[PATCH] [wideint] Implement _cairo_int64x32_128_mul().
Chris Wilson
chris at chris-wilson.co.uk
Tue Oct 7 03:20:44 PDT 2008
By implementing a 64bit by 32bit multiply special case, we can
take advantage of the occasions when we can use a narrower multiply.
This case is interesting due to its heavy use within intersect_lines()
and edges_compare_x_for_y() causing it to appears on profiles.
The implementation is about 2x faster during performance testing simply
due to the fact most of the multiplicands fit within 32bits and are
handled by the 32x32_64_mul(). Overall it gives a performance improvement
of ~2%.
---
src/cairo-wideint-private.h | 9 +++++++-
src/cairo-wideint.c | 46 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 54 insertions(+), 1 deletions(-)
diff --git a/src/cairo-wideint-private.h b/src/cairo-wideint-private.h
index f5aac28..8a19e38 100644
--- a/src/cairo-wideint-private.h
+++ b/src/cairo-wideint-private.h
@@ -72,6 +72,7 @@ cairo_uint64_t I _cairo_uint64_not (cairo_uint64_t a);
#define _cairo_int64_to_uint64(i) (i)
cairo_int64_t I _cairo_int32_to_int64(int32_t i);
+#define _cairo_int32_to_uint64(i) (_cairo_int64_to_uint64 (_cairo_int32_to_int64(i)))
#define _cairo_int64_to_int32(a) ((int32_t) _cairo_uint64_to_uint32(a))
#define _cairo_int64_add(a,b) _cairo_uint64_add (a,b)
#define _cairo_int64_sub(a,b) _cairo_uint64_sub (a,b)
@@ -111,6 +112,7 @@ int I _cairo_int64_cmp (cairo_int64_t a, cairo_int64_t b);
#define _cairo_int64_to_uint64(i) ((uint64_t) (i))
#define _cairo_int32_to_int64(i) ((int64_t) (i))
+#define _cairo_int32_to_uint64(i) (_cairo_int64_to_uint64 (_cairo_int32_to_int64(i)))
#define _cairo_int64_to_int32(i) ((int32_t) (i))
#define _cairo_int64_add(a,b) ((a) + (b))
#define _cairo_int64_sub(a,b) ((a) - (b))
@@ -169,6 +171,7 @@ cairo_uint128_t I _cairo_uint128_add (cairo_uint128_t a, cairo_uint128_t b);
cairo_uint128_t I _cairo_uint128_sub (cairo_uint128_t a, cairo_uint128_t b);
cairo_uint128_t I _cairo_uint128_mul (cairo_uint128_t a, cairo_uint128_t b);
cairo_uint128_t I _cairo_uint64x64_128_mul (cairo_uint64_t a, cairo_uint64_t b);
+cairo_int128_t I _cairo_uint64x32_128_mul (cairo_uint64_t a, uint32_t b);
cairo_uint128_t I _cairo_uint128_lsl (cairo_uint128_t a, int shift);
cairo_uint128_t I _cairo_uint128_rsl (cairo_uint128_t a, int shift);
cairo_uint128_t I _cairo_uint128_rsa (cairo_uint128_t a, int shift);
@@ -184,6 +187,7 @@ cairo_uint128_t I _cairo_uint128_not (cairo_uint128_t a);
#define _cairo_int128_to_uint128(i) (i)
cairo_int128_t I _cairo_int32_to_int128 (int32_t i);
+#define _cairo_int32_to_uint128(i) (_cairo_int128_to_uint128 (_cairo_int32_to_int128 (i)))
cairo_int128_t I _cairo_int64_to_int128 (cairo_int64_t i);
#define _cairo_int128_to_int64(a) ((cairo_int64_t) (a).lo)
#define _cairo_int128_to_int32(a) _cairo_int64_to_int32(_cairo_int128_to_int64(a))
@@ -191,7 +195,7 @@ cairo_int128_t I _cairo_int64_to_int128 (cairo_int64_t i);
#define _cairo_int128_sub(a,b) _cairo_uint128_sub(a,b)
#define _cairo_int128_mul(a,b) _cairo_uint128_mul(a,b)
cairo_int128_t I _cairo_int64x64_128_mul (cairo_int64_t a, cairo_int64_t b);
-#define _cairo_int64x32_128_mul(a, b) _cairo_int64x64_128_mul(a, _cairo_int32_to_int64(b))
+cairo_int128_t I _cairo_int64x32_128_mul (cairo_int64_t a, int32_t b);
#define _cairo_int128_lsl(a,b) _cairo_uint128_lsl(a,b)
#define _cairo_int128_rsl(a,b) _cairo_uint128_rsl(a,b)
#define _cairo_int128_rsa(a,b) _cairo_uint128_rsa(a,b)
@@ -213,6 +217,7 @@ int I _cairo_int128_cmp (cairo_int128_t a, cairo_int128_t b);
#define _cairo_uint128_sub(a,b) ((a) - (b))
#define _cairo_uint128_mul(a,b) ((a) * (b))
#define _cairo_uint64x64_128_mul(a,b) ((uint128_t) (a) * (b))
+#define _cairo_uint64x32_128_mul(a,b) ((uint128_t) (a) * (b))
#define _cairo_uint128_lsl(a,b) ((a) << (b))
#define _cairo_uint128_rsl(a,b) ((uint128_t) (a) >> (b))
#define _cairo_uint128_rsa(a,b) ((uint128_t) ((int128_t) (a) >> (b)))
@@ -228,6 +233,7 @@ int I _cairo_int128_cmp (cairo_int128_t a, cairo_int128_t b);
#define _cairo_int128_to_uint128(i) ((uint128_t) (i))
#define _cairo_int32_to_int128(i) ((int128_t) (i))
+#define _cairo_int32_to_uint128(i) (_cairo_int128_to_uint128 (_cairo_int32_to_int128 (i)))
#define _cairo_int64_to_int128(i) ((int128_t) (i))
#define _cairo_int128_to_int64(i) ((int64_t) (i))
#define _cairo_int128_to_int32(i) ((int32_t) (i))
@@ -235,6 +241,7 @@ int I _cairo_int128_cmp (cairo_int128_t a, cairo_int128_t b);
#define _cairo_int128_sub(a,b) ((a) - (b))
#define _cairo_int128_mul(a,b) ((a) * (b))
#define _cairo_int64x64_128_mul(a,b) ((int128_t) (a) * (b))
+#define _cairo_int64x32_128_mul(a,b) ((int128_t) (a) * (b))
#define _cairo_int128_lt(a,b) ((a) < (b))
#define _cairo_int128_cmp(a,b) ((a) == (b) ? 0 : (a) < (b) ? -1 : 1)
#define _cairo_int128_is_zero(a) ((a) == 0)
diff --git a/src/cairo-wideint.c b/src/cairo-wideint.c
index 4565593..78d2c6d 100644
--- a/src/cairo-wideint.c
+++ b/src/cairo-wideint.c
@@ -511,6 +511,52 @@ _cairo_int64x64_128_mul (cairo_int64_t a, cairo_int64_t b)
}
cairo_uint128_t
+_cairo_uint64x32_128_mul (cairo_uint64_t a, uint32_t b)
+{
+ cairo_uint128_t s;
+ cairo_uint64_t r0, r1, r2;
+
+ r0 = _cairo_uint32x32_64_mul (uint64_lo32 (a), b);
+ r2 = _cairo_uint32x32_64_mul (uint64_hi32 (a), b);
+
+ r1 = _cairo_uint64_add (uint64_hi (r0), r2); /* this can carry */
+ s.hi = uint64_hi (r1);
+ if (_cairo_uint64_lt (r1, r2)) /* check */
+ s.hi = _cairo_uint64_add (s.hi, uint64_carry32);
+
+ s.lo = _cairo_uint64_add (uint64_shift32 (r1),
+ uint64_lo (r0));
+
+ return s;
+}
+
+cairo_int128_t
+_cairo_int64x32_128_mul (cairo_int64_t a, int32_t b)
+{
+ cairo_int128_t s;
+
+ if (uint64_hi (a) == 0 || uint64_hi (a) == -1U) {
+ return _cairo_int64_to_int128 (
+ _cairo_int32x32_64_mul (uint64_lo (a), b));
+ }
+
+ if (b > 0) {
+ s = _cairo_uint64x32_128_mul (_cairo_int64_to_uint64 (a),
+ (uint32_t) b);
+ } else if (b < 0) {
+ s = _cairo_uint64x64_128_mul (_cairo_int64_to_uint64 (a),
+ _cairo_int32_to_uint64 (b));
+ s.hi = _cairo_uint64_sub (s.hi, _cairo_int64_to_uint64 (a));
+ } else
+ return _cairo_int32_to_int128 (0);
+
+ if (_cairo_int64_negative (a))
+ s.hi = _cairo_uint64_sub (s.hi, _cairo_int32_to_uint64 (b));
+
+ return s;
+}
+
+cairo_uint128_t
_cairo_uint128_mul (cairo_uint128_t a, cairo_uint128_t b)
{
cairo_uint128_t s;
--
1.5.6.3
--=-ZiiESaLDo1Qm+q2D8ALU
Content-Description:
Content-Disposition: inline; filename*0=0005--tessellator-Use-a-combsort-for-sorting-the-event-q.patc; filename*1=h
Content-Type: text/x-patch; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
More information about the cairo
mailing list