[PATCH] [wideint] Implement _cairo_int64x32_128_mul().

Chris Wilson chris at chris-wilson.co.uk
Tue Oct 7 03:20:44 PDT 2008


By implementing a 64bit by 32bit multiply special case, we can
take advantage of the occasions when we can use a narrower multiply.
This case is interesting due to its heavy use within intersect_lines()
and edges_compare_x_for_y() causing it to appears on profiles.

The implementation is about 2x faster during performance testing simply
due to the fact most of the multiplicands fit within 32bits and are
handled by the 32x32_64_mul(). Overall it gives a performance improvement
of ~2%.
---
 src/cairo-wideint-private.h |    9 +++++++-
 src/cairo-wideint.c         |   46 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 1 deletions(-)

diff --git a/src/cairo-wideint-private.h b/src/cairo-wideint-private.h
index f5aac28..8a19e38 100644
--- a/src/cairo-wideint-private.h
+++ b/src/cairo-wideint-private.h
@@ -72,6 +72,7 @@ cairo_uint64_t I	_cairo_uint64_not (cairo_uint64_t a);
 #define			_cairo_int64_to_uint64(i)   (i)
 
 cairo_int64_t  I	_cairo_int32_to_int64(int32_t i);
+#define			_cairo_int32_to_uint64(i)   (_cairo_int64_to_uint64 (_cairo_int32_to_int64(i)))
 #define			_cairo_int64_to_int32(a)    ((int32_t) _cairo_uint64_to_uint32(a))
 #define			_cairo_int64_add(a,b)	    _cairo_uint64_add (a,b)
 #define			_cairo_int64_sub(a,b)	    _cairo_uint64_sub (a,b)
@@ -111,6 +112,7 @@ int	       I	_cairo_int64_cmp (cairo_int64_t a, cairo_int64_t b);
 #define			_cairo_int64_to_uint64(i)   ((uint64_t) (i))
 
 #define			_cairo_int32_to_int64(i)    ((int64_t) (i))
+#define			_cairo_int32_to_uint64(i)   (_cairo_int64_to_uint64 (_cairo_int32_to_int64(i)))
 #define			_cairo_int64_to_int32(i)    ((int32_t) (i))
 #define			_cairo_int64_add(a,b)	    ((a) + (b))
 #define			_cairo_int64_sub(a,b)	    ((a) - (b))
@@ -169,6 +171,7 @@ cairo_uint128_t I	_cairo_uint128_add (cairo_uint128_t a, cairo_uint128_t b);
 cairo_uint128_t I	_cairo_uint128_sub (cairo_uint128_t a, cairo_uint128_t b);
 cairo_uint128_t I	_cairo_uint128_mul (cairo_uint128_t a, cairo_uint128_t b);
 cairo_uint128_t I	_cairo_uint64x64_128_mul (cairo_uint64_t a, cairo_uint64_t b);
+cairo_int128_t I        _cairo_uint64x32_128_mul (cairo_uint64_t a, uint32_t b);
 cairo_uint128_t I	_cairo_uint128_lsl (cairo_uint128_t a, int shift);
 cairo_uint128_t I	_cairo_uint128_rsl (cairo_uint128_t a, int shift);
 cairo_uint128_t I	_cairo_uint128_rsa (cairo_uint128_t a, int shift);
@@ -184,6 +187,7 @@ cairo_uint128_t I	_cairo_uint128_not (cairo_uint128_t a);
 #define			_cairo_int128_to_uint128(i)	(i)
 
 cairo_int128_t  I	_cairo_int32_to_int128 (int32_t i);
+#define			_cairo_int32_to_uint128(i)  (_cairo_int128_to_uint128 (_cairo_int32_to_int128 (i)))
 cairo_int128_t  I	_cairo_int64_to_int128 (cairo_int64_t i);
 #define			_cairo_int128_to_int64(a)   ((cairo_int64_t) (a).lo)
 #define			_cairo_int128_to_int32(a)   _cairo_int64_to_int32(_cairo_int128_to_int64(a))
@@ -191,7 +195,7 @@ cairo_int128_t  I	_cairo_int64_to_int128 (cairo_int64_t i);
 #define			_cairo_int128_sub(a,b)	    _cairo_uint128_sub(a,b)
 #define			_cairo_int128_mul(a,b)	    _cairo_uint128_mul(a,b)
 cairo_int128_t I _cairo_int64x64_128_mul (cairo_int64_t a, cairo_int64_t b);
-#define                 _cairo_int64x32_128_mul(a, b) _cairo_int64x64_128_mul(a, _cairo_int32_to_int64(b))
+cairo_int128_t I _cairo_int64x32_128_mul (cairo_int64_t a, int32_t b);
 #define			_cairo_int128_lsl(a,b)	    _cairo_uint128_lsl(a,b)
 #define			_cairo_int128_rsl(a,b)	    _cairo_uint128_rsl(a,b)
 #define			_cairo_int128_rsa(a,b)	    _cairo_uint128_rsa(a,b)
@@ -213,6 +217,7 @@ int	        I	_cairo_int128_cmp (cairo_int128_t a, cairo_int128_t b);
 #define			_cairo_uint128_sub(a,b)	    ((a) - (b))
 #define			_cairo_uint128_mul(a,b)	    ((a) * (b))
 #define			_cairo_uint64x64_128_mul(a,b)	((uint128_t) (a) * (b))
+#define			_cairo_uint64x32_128_mul(a,b)	((uint128_t) (a) * (b))
 #define			_cairo_uint128_lsl(a,b)	    ((a) << (b))
 #define			_cairo_uint128_rsl(a,b)	    ((uint128_t) (a) >> (b))
 #define			_cairo_uint128_rsa(a,b)	    ((uint128_t) ((int128_t) (a) >> (b)))
@@ -228,6 +233,7 @@ int	        I	_cairo_int128_cmp (cairo_int128_t a, cairo_int128_t b);
 #define			_cairo_int128_to_uint128(i) ((uint128_t) (i))
 
 #define			_cairo_int32_to_int128(i)   ((int128_t) (i))
+#define			_cairo_int32_to_uint128(i)  (_cairo_int128_to_uint128 (_cairo_int32_to_int128 (i)))
 #define			_cairo_int64_to_int128(i)   ((int128_t) (i))
 #define			_cairo_int128_to_int64(i)   ((int64_t) (i))
 #define			_cairo_int128_to_int32(i)   ((int32_t) (i))
@@ -235,6 +241,7 @@ int	        I	_cairo_int128_cmp (cairo_int128_t a, cairo_int128_t b);
 #define			_cairo_int128_sub(a,b)	    ((a) - (b))
 #define			_cairo_int128_mul(a,b)	    ((a) * (b))
 #define			_cairo_int64x64_128_mul(a,b) ((int128_t) (a) * (b))
+#define			_cairo_int64x32_128_mul(a,b) ((int128_t) (a) * (b))
 #define			_cairo_int128_lt(a,b)	    ((a) < (b))
 #define			_cairo_int128_cmp(a,b)	    ((a) == (b) ? 0 : (a) < (b) ? -1 : 1)
 #define			_cairo_int128_is_zero(a)    ((a) == 0)
diff --git a/src/cairo-wideint.c b/src/cairo-wideint.c
index 4565593..78d2c6d 100644
--- a/src/cairo-wideint.c
+++ b/src/cairo-wideint.c
@@ -511,6 +511,52 @@ _cairo_int64x64_128_mul (cairo_int64_t a, cairo_int64_t b)
 }
 
 cairo_uint128_t
+_cairo_uint64x32_128_mul (cairo_uint64_t a, uint32_t b)
+{
+    cairo_uint128_t	s;
+    cairo_uint64_t	r0, r1, r2;
+
+    r0 = _cairo_uint32x32_64_mul (uint64_lo32 (a), b);
+    r2 = _cairo_uint32x32_64_mul (uint64_hi32 (a), b);
+
+    r1 = _cairo_uint64_add (uint64_hi (r0), r2);    /* this can carry */
+    s.hi = uint64_hi (r1);
+    if (_cairo_uint64_lt (r1, r2))		    /* check */
+	s.hi = _cairo_uint64_add (s.hi, uint64_carry32);
+
+    s.lo = _cairo_uint64_add (uint64_shift32 (r1),
+			      uint64_lo (r0));
+
+    return s;
+}
+
+cairo_int128_t
+_cairo_int64x32_128_mul (cairo_int64_t a, int32_t b)
+{
+    cairo_int128_t s;
+
+    if (uint64_hi (a) == 0 || uint64_hi (a) == -1U) {
+	return _cairo_int64_to_int128 (
+		_cairo_int32x32_64_mul (uint64_lo (a), b));
+    }
+
+    if (b > 0) {
+	s = _cairo_uint64x32_128_mul (_cairo_int64_to_uint64 (a),
+		                      (uint32_t) b);
+    } else if (b < 0) {
+	s = _cairo_uint64x64_128_mul (_cairo_int64_to_uint64 (a),
+		                      _cairo_int32_to_uint64 (b));
+	s.hi = _cairo_uint64_sub (s.hi, _cairo_int64_to_uint64 (a));
+    } else
+	return _cairo_int32_to_int128 (0);
+
+    if (_cairo_int64_negative (a))
+	s.hi = _cairo_uint64_sub (s.hi, _cairo_int32_to_uint64 (b));
+
+    return s;
+}
+
+cairo_uint128_t
 _cairo_uint128_mul (cairo_uint128_t a, cairo_uint128_t b)
 {
     cairo_uint128_t	s;
-- 
1.5.6.3


--=-ZiiESaLDo1Qm+q2D8ALU
Content-Description: 
Content-Disposition: inline; filename*0=0005--tessellator-Use-a-combsort-for-sorting-the-event-q.patc; filename*1=h
Content-Type: text/x-patch; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable



More information about the cairo mailing list