[gst-cvs] gstreamer: utils: optimize for x86_64 with some inline asm
Wim Taymans
wtay at kemper.freedesktop.org
Fri Aug 28 03:46:04 PDT 2009
Module: gstreamer
Branch: master
Commit: d8911f269faa235572b80a8a25b842b09df6e151
URL: http://cgit.freedesktop.org/gstreamer/gstreamer/commit/?id=d8911f269faa235572b80a8a25b842b09df6e151
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Fri Aug 28 12:43:43 2009 +0200
utils: optimize for x86_64 with some inline asm
64bit x86 has native 64x64->128 bit multiply that we can use with some inline
assembler to speed up large multiplications.
Use bsr to find the number of leading zeros more efficiently.
---
gst/gstutils.c | 40 ++++++++++++++++++++++++++++++++++++++++
1 files changed, 40 insertions(+), 0 deletions(-)
diff --git a/gst/gstutils.c b/gst/gstutils.c
index fddf871..fa78e11 100644
--- a/gst/gstutils.c
+++ b/gst/gstutils.c
@@ -204,6 +204,16 @@ typedef union
} l;
} GstUInt64;
+#if defined (__x86_64__) && defined (__GNUC__)
+static void
+gst_util_uint64_mul_uint64 (GstUInt64 * c1, GstUInt64 * c0, guint64 arg1,
+ guint64 arg2)
+{
+ __asm__ __volatile__ ("mul %3":"=a" (c0->ll), "=d" (c1->ll)
+ :"a" (arg1), "g" (arg2)
+ );
+}
+#else /* defined (__x86_64__) */
/* multiply two 64-bit unsigned ints into a 128-bit unsigned int. the high
* and low 64 bits of the product are placed in c1 and c0 respectively.
* this operation cannot overflow. */
@@ -246,8 +256,21 @@ gst_util_uint64_mul_uint64 (GstUInt64 * c1, GstUInt64 * c0, guint64 arg1,
* the high words of a1 and b0 to b1, the result is c1. */
c1->ll = (guint64) v.l.high * n.l.high + c1->l.high + a1.l.high + b0.l.high;
}
+#endif /* defined (__x86_64__) */
/* count leading zeros */
+#if defined (__x86_64__) && defined (__GNUC__)
+static guint
+gst_util_clz (guint32 val)
+{
+ guint s;
+
+ __asm__ __volatile__ ("bsrl %0, %0 \n\t"
+ "xor $31, %0 \n\t":"=r" (s):"0" (val)
+ );
+ return s;
+}
+#else /* defined (__x86_64__) */
static guint
gst_util_clz (guint32 val)
{
@@ -266,6 +289,7 @@ gst_util_clz (guint32 val)
return s;
}
+#endif /* defined (__x86_64__) */
/* based on Hacker's Delight p152 */
static guint64
@@ -329,6 +353,21 @@ gst_util_div128_64 (GstUInt64 c1, GstUInt64 c0, guint64 denom)
/* multiply a 64-bit unsigned int by a 32-bit unsigned int into a 96-bit
* unsigned int. the high 64 bits and low 32 bits of the product are
* placed in c1 and c0 respectively. this operation cannot overflow. */
+#if defined (__x86_64__) && defined (__GNUC__)
+static void
+gst_util_uint64_mul_uint32 (GstUInt64 * c1, GstUInt64 * c0, guint64 arg1,
+ guint32 arg2)
+{
+ __asm__ __volatile__ ("mul %%rcx \n\t"
+ "mov %%rax, %%rcx \n\t"
+ "shl $32, %%rdx \n\t"
+ "shr $32, %%rcx \n\t"
+ "or %%rcx, %%rdx \n\t"
+ "and $0xffffffff, %%eax \n\t":"=a" (c0->ll), "=d" (c1->ll)
+ :"a" (arg1), "c" ((guint64) arg2)
+ );
+}
+#else /* defined (__x86_64__) */
static void
gst_util_uint64_mul_uint32 (GstUInt64 * c1, GstUInt64 * c0, guint64 arg1,
guint32 arg2)
@@ -341,6 +380,7 @@ gst_util_uint64_mul_uint32 (GstUInt64 * c1, GstUInt64 * c0, guint64 arg1,
c1->ll = (guint64) a.l.high * arg2 + c0->l.high;
c0->l.high = 0;
}
+#endif /* defined (__x86_64__) */
/* divide a 96-bit unsigned int by a 32-bit unsigned int when we know the
* quotient fits into 64 bits. the high 64 bits and low 32 bits of the
More information about the Gstreamer-commits
mailing list