[Liboil-commit] liboil/i386_amd64 liboil/liboilclasses.h liboil/liboilfuncs-04.h liboil/liboilfuncs-doc.h liboil/liboilfuncs.h liboil/liboiltrampolines.c liboil/ref

David Schleef ds at kemper.freedesktop.org
Sat Sep 13 05:53:25 PDT 2008


 liboil/i386_amd64/sad8x8.c |   71 +++++++++++++++++++++++++++++++++++++++++++++
 liboil/liboilclasses.h     |    2 +
 liboil/liboilfuncs-04.h    |    2 +
 liboil/liboilfuncs-doc.h   |    2 +
 liboil/liboilfuncs.h       |    6 +++
 liboil/liboiltrampolines.c |   20 ++++++++++++
 liboil/ref/wavelet.c       |   49 +++++++++++++++++++++++++++++++
 7 files changed, 152 insertions(+)

New commits:
commit a28a9de744adfa810537fa5c5cdd6baef08e7919
Author: David Schleef <ds at schleef.org>
Date:   Sat Sep 13 05:52:49 2008 -0700

    Add avg2_32xn_u8

diff --git a/liboil/i386_amd64/sad8x8.c b/liboil/i386_amd64/sad8x8.c
index f72cbf5..fbc268e 100644
--- a/liboil/i386_amd64/sad8x8.c
+++ b/liboil/i386_amd64/sad8x8.c
@@ -617,6 +617,47 @@ combine4_16xn_u8_mmx (uint8_t *d, int ds1,
 OIL_DEFINE_IMPL_FULL (combine4_16xn_u8_mmx, combine4_16xn_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
 
 void
+combine4_32xn_u8_mmx (uint8_t *d, int ds1,
+    uint8_t *s1, int ss1,
+    uint8_t *s2, int ss2,
+    uint8_t *s3, int ss3,
+    uint8_t *s4, int ss4,
+    int16_t *s5_6, int n)
+{
+  int j;
+
+  asm volatile ("\n"
+      "  pxor %%mm7, %%mm7\n"
+      "  movq 0(%0), %%mm6\n"
+      "  movd 8(%0), %%mm4\n"
+      "  pshufw $0x00, %%mm4, %%mm4\n"
+      ::"r" (s5_6));
+
+  for(j=0;j<n;j++){
+    asm volatile ("\n"
+        DO_4(0)
+        DO_4(4)
+        DO_4(8)
+        DO_4(12)
+        DO_4(16)
+        DO_4(20)
+        DO_4(24)
+        DO_4(28)
+
+        :
+        : "r" (d), "r" (s1), "r" (s2), "r" (s3), "r" (s4));
+
+    s1 += ss1;
+    s2 += ss2;
+    s3 += ss3;
+    s4 += ss4;
+    d += ds1;
+  }
+  asm volatile ("emms");
+}
+OIL_DEFINE_IMPL_FULL (combine4_32xn_u8_mmx, combine4_32xn_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
+
+void
 combine2_12xn_u8_mmx (uint8_t *d, int ds1,
     uint8_t *s1, int ss1,
     uint8_t *s2, int ss2,
@@ -819,3 +860,33 @@ avg2_16xn_u8_mmx (uint8_t *d, int ds1, uint8_t *s1, int ss1,
 }
 OIL_DEFINE_IMPL_FULL (avg2_16xn_u8_mmx, avg2_16xn_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
 
+void
+avg2_32xn_u8_mmx (uint8_t *d, int ds1, uint8_t *s1, int ss1,
+    uint8_t *s2, int ss2, int n)
+{
+  int j;
+  for(j=0;j<n;j++){
+    asm volatile ("\n"
+        "  movq 0(%[s1]), %%mm0\n"
+        "  pavgb 0(%[s2]), %%mm0\n"
+        "  movq %%mm0, 0(%[d])\n"
+        "  movq 8(%[s1]), %%mm0\n"
+        "  pavgb 8(%[s2]), %%mm0\n"
+        "  movq %%mm0, 8(%[d])\n"
+        "  movq 16(%[s1]), %%mm0\n"
+        "  pavgb 16(%[s2]), %%mm0\n"
+        "  movq %%mm0, 16(%[d])\n"
+        "  movq 24(%[s1]), %%mm0\n"
+        "  pavgb 24(%[s2]), %%mm0\n"
+        "  movq %%mm0, 24(%[d])\n"
+        :
+        : [d] "r" (d), [s1] "r" (s1), [s2] "r" (s2));
+
+    s1 += ss1;
+    s2 += ss2;
+    d += ds1;
+  }
+  asm volatile ("emms");
+}
+OIL_DEFINE_IMPL_FULL (avg2_32xn_u8_mmx, avg2_32xn_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
+
diff --git a/liboil/liboilclasses.h b/liboil/liboilclasses.h
index 8cfa0b2..3fc6e03 100644
--- a/liboil/liboilclasses.h
+++ b/liboil/liboilclasses.h
@@ -54,6 +54,7 @@ OIL_DECLARE_CLASS(argb_paint_u8);
 OIL_DECLARE_CLASS(average2_u8);
 OIL_DECLARE_CLASS(avg2_12xn_u8);
 OIL_DECLARE_CLASS(avg2_16xn_u8);
+OIL_DECLARE_CLASS(avg2_32xn_u8);
 OIL_DECLARE_CLASS(avg2_8xn_u8);
 OIL_DECLARE_CLASS(ayuv2argb_u8);
 OIL_DECLARE_CLASS(ayuv2uyvy);
@@ -129,6 +130,7 @@ OIL_DECLARE_CLASS(combine2_16xn_u8);
 OIL_DECLARE_CLASS(combine2_8xn_u8);
 OIL_DECLARE_CLASS(combine4_12xn_u8);
 OIL_DECLARE_CLASS(combine4_16xn_u8);
+OIL_DECLARE_CLASS(combine4_32xn_u8);
 OIL_DECLARE_CLASS(combine4_8xn_u8);
 OIL_DECLARE_CLASS(compare_u8);
 OIL_DECLARE_CLASS(composite_add_argb);
diff --git a/liboil/liboilfuncs-04.h b/liboil/liboilfuncs-04.h
index 39a07de..103c19e 100644
--- a/liboil/liboilfuncs-04.h
+++ b/liboil/liboilfuncs-04.h
@@ -54,6 +54,7 @@ void oil_argb_paint_u8 (uint8_t * i_4xn, const uint8_t * s1_4, const uint8_t * s
 void oil_average2_u8 (uint8_t * d, int dstr, const uint8_t * s1, int sstr1, const uint8_t * s2, int sstr2, int n);
 void oil_avg2_12xn_u8 (uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, int n);
 void oil_avg2_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n);
+void oil_avg2_32xn_u8 (uint8_t * d_32xn, int ds1, const uint8_t * s1_32xn, int ss1, const uint8_t * s2_32xn, int ss2, int n);
 void oil_avg2_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n);
 void oil_ayuv2argb_u8 (uint8_t * d_4xn, const uint8_t * s_4xn, int n);
 void oil_ayuv2uyvy (uint32_t * d_n, const uint32_t * s_n, int n);
@@ -129,6 +130,7 @@ void oil_combine2_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, i
 void oil_combine2_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const int16_t * s3_4, int n);
 void oil_combine4_12xn_u8 (uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, const uint8_t * s3_12xn, int ss3, const uint8_t * s4_12xn, int ss4, const int16_t * s5_6, int n);
 void oil_combine4_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n);
+void oil_combine4_32xn_u8 (uint8_t * d_32xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_32xn, int ss2, const uint8_t * s3_32xn, int ss3, const uint8_t * s4_32xn, int ss4, const int16_t * s5_6, int n);
 void oil_combine4_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n);
 void oil_compare_u8 (uint32_t * d_1, const uint8_t * s1, const uint8_t * s2, int n);
 void oil_composite_add_argb (uint32_t * i_n, const uint32_t * s1_n, int n);
diff --git a/liboil/liboilfuncs-doc.h b/liboil/liboilfuncs-doc.h
index c5b3b9e..b914e19 100644
--- a/liboil/liboilfuncs-doc.h
+++ b/liboil/liboilfuncs-doc.h
@@ -18,6 +18,7 @@ void oil_argb_paint_u8 (uint8_t * i_4xn, const uint8_t * s1_4, const uint8_t * s
 void oil_average2_u8 (uint8_t * d, int dstr, const uint8_t * s1, int sstr1, const uint8_t * s2, int sstr2, int n);
 void oil_avg2_12xn_u8 (uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, int n);
 void oil_avg2_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n);
+void oil_avg2_32xn_u8 (uint8_t * d_32xn, int ds1, const uint8_t * s1_32xn, int ss1, const uint8_t * s2_32xn, int ss2, int n);
 void oil_avg2_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n);
 void oil_ayuv2argb_u8 (uint8_t * d_4xn, const uint8_t * s_4xn, int n);
 void oil_ayuv2uyvy (uint32_t * d_n, const uint32_t * s_n, int n);
@@ -93,6 +94,7 @@ void oil_combine2_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, i
 void oil_combine2_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const int16_t * s3_4, int n);
 void oil_combine4_12xn_u8 (uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, const uint8_t * s3_12xn, int ss3, const uint8_t * s4_12xn, int ss4, const int16_t * s5_6, int n);
 void oil_combine4_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n);
+void oil_combine4_32xn_u8 (uint8_t * d_32xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_32xn, int ss2, const uint8_t * s3_32xn, int ss3, const uint8_t * s4_32xn, int ss4, const int16_t * s5_6, int n);
 void oil_combine4_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n);
 void oil_compare_u8 (uint32_t * d_1, const uint8_t * s1, const uint8_t * s2, int n);
 void oil_composite_add_argb (uint32_t * i_n, const uint32_t * s1_n, int n);
diff --git a/liboil/liboilfuncs.h b/liboil/liboilfuncs.h
index e326a77..cd03099 100644
--- a/liboil/liboilfuncs.h
+++ b/liboil/liboilfuncs.h
@@ -90,6 +90,9 @@ typedef void (*_oil_type_avg2_12xn_u8)(uint8_t * d_12xn, int ds1, const uint8_t
 OIL_EXPORT OilFunctionClass *oil_function_class_ptr_avg2_16xn_u8;
 typedef void (*_oil_type_avg2_16xn_u8)(uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n);
 #define oil_avg2_16xn_u8 ((_oil_type_avg2_16xn_u8)(*(void **)oil_function_class_ptr_avg2_16xn_u8))
+OIL_EXPORT OilFunctionClass *oil_function_class_ptr_avg2_32xn_u8;
+typedef void (*_oil_type_avg2_32xn_u8)(uint8_t * d_32xn, int ds1, const uint8_t * s1_32xn, int ss1, const uint8_t * s2_32xn, int ss2, int n);
+#define oil_avg2_32xn_u8 ((_oil_type_avg2_32xn_u8)(*(void **)oil_function_class_ptr_avg2_32xn_u8))
 OIL_EXPORT OilFunctionClass *oil_function_class_ptr_avg2_8xn_u8;
 typedef void (*_oil_type_avg2_8xn_u8)(uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n);
 #define oil_avg2_8xn_u8 ((_oil_type_avg2_8xn_u8)(*(void **)oil_function_class_ptr_avg2_8xn_u8))
@@ -315,6 +318,9 @@ typedef void (*_oil_type_combine4_12xn_u8)(uint8_t * d_12xn, int ds1, const uint
 OIL_EXPORT OilFunctionClass *oil_function_class_ptr_combine4_16xn_u8;
 typedef void (*_oil_type_combine4_16xn_u8)(uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n);
 #define oil_combine4_16xn_u8 ((_oil_type_combine4_16xn_u8)(*(void **)oil_function_class_ptr_combine4_16xn_u8))
+OIL_EXPORT OilFunctionClass *oil_function_class_ptr_combine4_32xn_u8;
+typedef void (*_oil_type_combine4_32xn_u8)(uint8_t * d_32xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_32xn, int ss2, const uint8_t * s3_32xn, int ss3, const uint8_t * s4_32xn, int ss4, const int16_t * s5_6, int n);
+#define oil_combine4_32xn_u8 ((_oil_type_combine4_32xn_u8)(*(void **)oil_function_class_ptr_combine4_32xn_u8))
 OIL_EXPORT OilFunctionClass *oil_function_class_ptr_combine4_8xn_u8;
 typedef void (*_oil_type_combine4_8xn_u8)(uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n);
 #define oil_combine4_8xn_u8 ((_oil_type_combine4_8xn_u8)(*(void **)oil_function_class_ptr_combine4_8xn_u8))
diff --git a/liboil/liboiltrampolines.c b/liboil/liboiltrampolines.c
index 9009dff..7096766 100644
--- a/liboil/liboiltrampolines.c
+++ b/liboil/liboiltrampolines.c
@@ -211,6 +211,16 @@ oil_avg2_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, c
   ((void (*)(uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n))(_oil_function_class_avg2_16xn_u8.func))(d_16xn, ds1, s1_16xn, ss1, s2_16xn, ss2, n);
 }
 
+#undef oil_avg2_32xn_u8
+void
+oil_avg2_32xn_u8 (uint8_t * d_32xn, int ds1, const uint8_t * s1_32xn, int ss1, const uint8_t * s2_32xn, int ss2, int n)
+{
+  if (_oil_function_class_avg2_32xn_u8.func == NULL) {
+    oil_class_optimize (&_oil_function_class_avg2_32xn_u8);
+  }
+  ((void (*)(uint8_t * d_32xn, int ds1, const uint8_t * s1_32xn, int ss1, const uint8_t * s2_32xn, int ss2, int n))(_oil_function_class_avg2_32xn_u8.func))(d_32xn, ds1, s1_32xn, ss1, s2_32xn, ss2, n);
+}
+
 #undef oil_avg2_8xn_u8
 void
 oil_avg2_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n)
@@ -961,6 +971,16 @@ oil_combine4_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss
   ((void (*)(uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n))(_oil_function_class_combine4_16xn_u8.func))(d_16xn, ds1, s1_16xn, ss1, s2_16xn, ss2, s3_16xn, ss3, s4_16xn, ss4, s5_6, n);
 }
 
+#undef oil_combine4_32xn_u8
+void
+oil_combine4_32xn_u8 (uint8_t * d_32xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_32xn, int ss2, const uint8_t * s3_32xn, int ss3, const uint8_t * s4_32xn, int ss4, const int16_t * s5_6, int n)
+{
+  if (_oil_function_class_combine4_32xn_u8.func == NULL) {
+    oil_class_optimize (&_oil_function_class_combine4_32xn_u8);
+  }
+  ((void (*)(uint8_t * d_32xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_32xn, int ss2, const uint8_t * s3_32xn, int ss3, const uint8_t * s4_32xn, int ss4, const int16_t * s5_6, int n))(_oil_function_class_combine4_32xn_u8.func))(d_32xn, ds1, s1_16xn, ss1, s2_32xn, ss2, s3_32xn, ss3, s4_32xn, ss4, s5_6, n);
+}
+
 #undef oil_combine4_8xn_u8
 void
 oil_combine4_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n)
diff --git a/liboil/ref/wavelet.c b/liboil/ref/wavelet.c
index 77f5e54..7295545 100644
--- a/liboil/ref/wavelet.c
+++ b/liboil/ref/wavelet.c
@@ -149,6 +149,9 @@ OIL_DEFINE_CLASS_FULL (combine4_12xn_u8, "uint8_t *d_12xn, int ds1, "
 OIL_DEFINE_CLASS_FULL (combine4_16xn_u8, "uint8_t *d_16xn, int ds1, "
     "uint8_t *s1_16xn, int ss1, uint8_t *s2_16xn, int ss2, uint8_t *s3_16xn, "
     "int ss3, uint8_t *s4_16xn, int ss4, int16_t *s5_6, int n", combine4_test);
+OIL_DEFINE_CLASS_FULL (combine4_32xn_u8, "uint8_t *d_32xn, int ds1, "
+    "uint8_t *s1_16xn, int ss1, uint8_t *s2_32xn, int ss2, uint8_t *s3_32xn, "
+    "int ss3, uint8_t *s4_32xn, int ss4, int16_t *s5_6, int n", combine4_test);
 OIL_DEFINE_CLASS_FULL (add2_rshift_add_s16, "int16_t *d, int16_t *s1, "
     "int16_t *s2, int16_t *s3, int16_t *s4_2, int n", add2_test);
 OIL_DEFINE_CLASS_FULL (add2_rshift_sub_s16, "int16_t *d, int16_t *s1, "
@@ -159,6 +162,8 @@ OIL_DEFINE_CLASS (avg2_12xn_u8, "uint8_t *d_12xn, int ds1, "
     "uint8_t *s1_12xn, int ss1, uint8_t *s2_12xn, int ss2, int n");
 OIL_DEFINE_CLASS (avg2_16xn_u8, "uint8_t *d_16xn, int ds1, "
     "uint8_t *s1_16xn, int ss1, uint8_t *s2_16xn, int ss2, int n");
+OIL_DEFINE_CLASS (avg2_32xn_u8, "uint8_t *d_32xn, int ds1, "
+    "uint8_t *s1_32xn, int ss1, uint8_t *s2_32xn, int ss2, int n");
 
 void
 deinterleave_ref (int16_t *d_2xn, int16_t *s_2xn, int n)
@@ -791,6 +796,34 @@ combine4_16xn_u8_ref (uint8_t *d, int ds1,
 OIL_DEFINE_IMPL_REF (combine4_16xn_u8_ref, combine4_16xn_u8);
 
 void
+combine4_32xn_u8_ref (uint8_t *d, int ds1,
+    uint8_t *s1, int ss1,
+    uint8_t *s2, int ss2,
+    uint8_t *s3, int ss3,
+    uint8_t *s4, int ss4,
+    int16_t *s5_6, int n)
+{
+  int i;
+  int j;
+  for(j=0;j<n;j++){
+    for(i=0;i<32;i++){
+      int x = 0;
+      x += s5_6[0] * s1[i];
+      x += s5_6[1] * s2[i];
+      x += s5_6[2] * s3[i];
+      x += s5_6[3] * s4[i];
+      d[i] = (x + s5_6[4]) >> s5_6[5];
+    }
+    s1 += ss1;
+    s2 += ss2;
+    s3 += ss3;
+    s4 += ss4;
+    d += ds1;
+  }
+}
+OIL_DEFINE_IMPL_REF (combine4_32xn_u8_ref, combine4_32xn_u8);
+
+void
 combine2_8xn_u8_ref (uint8_t *d, int ds1,
     uint8_t *s1, int ss1,
     uint8_t *s2, int ss2,
@@ -929,3 +962,19 @@ avg2_16xn_u8_ref (uint8_t *d, int ds1, uint8_t *s1, int ss1,
 }
 OIL_DEFINE_IMPL_REF (avg2_16xn_u8_ref, avg2_16xn_u8);
 
+void
+avg2_32xn_u8_ref (uint8_t *d, int ds1, uint8_t *s1, int ss1,
+    uint8_t *s2, int ss2, int n)
+{
+  int i;
+  int j;
+  for(j=0;j<n;j++){
+    for(i=0;i<32;i++){
+      d[i] = (s1[i] + s2[i] + 1)>>1;
+    }
+    s1 += ss1;
+    s2 += ss2;
+    d += ds1;
+  }
+}
+OIL_DEFINE_IMPL_REF (avg2_32xn_u8_ref, avg2_32xn_u8);


More information about the Liboil-commit mailing list