xserver/fb fbcompose.c, 1.30, 1.31 fbmmx.c, 1.2, 1.3 fbmmx.h, 1.1,
1.2 fbpict.c, 1.38, 1.39 fbpict.h, 1.23, 1.24
Lars Knoll
xserver-commit at pdx.freedesktop.org
Tue Jul 12 19:57:02 EST 2005
Committed by: lars
Update of /cvs/xserver/xserver/fb
In directory gabe:/tmp/cvs-serv16642/fb
Modified Files:
fbcompose.c fbmmx.c fbmmx.h fbpict.c fbpict.h
Log Message:
Add MMX Code paths for the basic composition operations
in fbComposeGeneral.
Check for -msse in configure, as pshufw needs this flag working.
Index: fbcompose.c
===================================================================
RCS file: /cvs/xserver/xserver/fb/fbcompose.c,v
retrieving revision 1.30
retrieving revision 1.31
diff -u -d -r1.30 -r1.31
--- fbcompose.c 4 Jul 2005 14:43:21 -0000 1.30
+++ fbcompose.c 12 Jul 2005 09:57:00 -0000 1.31
@@ -125,10 +125,12 @@
const CARD16 *end = pixel + width;
while (pixel < end) {
CARD32 p = *pixel++;
- *buffer++ = 0xff000000 |
- ((((p) << 3) & 0xf8) | (((p) >> 2) & 0x7)) |
- ((((p) << 5) & 0xfc00) | (((p) >> 1) & 0x300)) |
- ((((p) << 8) & 0xf80000) | (((p) << 3) & 0x70000));
+ CARD32 r = (((p) << 3) & 0xf8) |
+ (((p) << 5) & 0xfc00) |
+ (((p) << 8) & 0xf80000);
+ r |= (r >> 5) & 0x70007;
+ r |= (r >> 6) & 0x300;
+ *buffer++ = 0xff000000 | r;
}
}
@@ -485,7 +487,7 @@
a |= a << 2;
a |= a << 4;
*buffer++ = a << 24;
- }
+ }
}
static FASTCALL void
@@ -1424,7 +1426,6 @@
/*
* Combine src and mask
*/
-
static FASTCALL void
fbCombineMaskU (CARD32 *src, const CARD32 *mask, int width)
{
@@ -1437,13 +1438,9 @@
}
}
-
-
/*
* All of the composing functions
*/
-typedef FASTCALL void (*CombineFuncU) (CARD32 *dest, const CARD32 *src, int width);
-
static FASTCALL void
fbCombineClear (CARD32 *dest, const CARD32 *src, int width)
@@ -1603,7 +1600,7 @@
da = ~d >> 24;
if (sa > da)
{
- sa = (da << 8) / sa;
+ sa = FbIntDiv(da, sa);
FbByteMul(s, sa);
}
FbByteAdd(d, s);
@@ -1922,7 +1919,7 @@
fbCombineConjointGeneralU (dest, src, width, CombineXor);
}
-static CombineFuncU combineFuncU[] = {
+static CombineFuncU fbCombineFuncU[] = {
fbCombineClear,
fbCombineSrcU,
0, /* CombineDst */
@@ -1969,8 +1966,6 @@
fbCombineConjointXorU,
};
-
-
static FASTCALL void
fbCombineMaskC (CARD32 *src, CARD32 *mask, int width)
{
@@ -2057,8 +2052,6 @@
}
}
-typedef FASTCALL void (*CombineFuncC) (CARD32 *dest, CARD32 *src, CARD32 *mask, int width);
-
static FASTCALL void
fbCombineClearC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
{
@@ -2547,7 +2540,7 @@
fbCombineConjointGeneralC (dest, src, mask, width, CombineXor);
}
-static CombineFuncC combineFuncC[] = {
+static CombineFuncC fbCombineFuncC[] = {
fbCombineClearC,
fbCombineSrcC,
0, /* Dest */
@@ -2594,6 +2587,14 @@
fbCombineConjointXorC,
};
+
+FbComposeFunctions composeFunctions = {
+ fbCombineFuncU,
+ fbCombineFuncC,
+ fbCombineMaskU
+};
+
+
static void fbFetchSolid(PicturePtr pict, int x, int y, int width, CARD32 *buffer)
{
FbBits *bits;
@@ -3332,7 +3333,7 @@
if (fetchSrc && fetchMask && data->mask && data->mask->componentAlpha && PICT_FORMAT_RGB(data->mask->format)) {
CARD32 *mask_buffer = dest_buffer + data->width;
- CombineFuncC compose = combineFuncC[data->op];
+ CombineFuncC compose = composeFunctions.combineC[data->op];
if (!compose)
return;
@@ -3354,7 +3355,7 @@
}
} else {
- CombineFuncU compose = combineFuncU[data->op];
+ CombineFuncU compose = composeFunctions.combineU[data->op];
if (!compose)
return;
@@ -3362,7 +3363,7 @@
fetchSrc(data->src, data->xSrc, data->ySrc, data->width, src_buffer);
if (fetchMask) {
fetchMask(data->mask, data->xMask, data->yMask, data->width, dest_buffer);
- fbCombineMaskU(src_buffer, dest_buffer, data->width);
+ composeFunctions.combineMaskU(src_buffer, dest_buffer, data->width);
}
fetchSrc = 0;
fetchMask = 0;
@@ -3376,7 +3377,7 @@
/* add in mask */
if (fetchMask) {
fetchMask(data->mask, data->xMask, data->yMask + i, data->width, dest_buffer);
- fbCombineMaskU(src_buffer, dest_buffer, data->width);
+ composeFunctions.combineMaskU(src_buffer, dest_buffer, data->width);
}
}
@@ -3416,7 +3417,7 @@
CARD32 _scanline_buffer[SCANLINE_BUFFER_LENGTH*3];
CARD32 *scanline_buffer = _scanline_buffer;
FbComposeData compose_data;
-
+
if (pSrc->pDrawable)
srcRepeat = pSrc->repeat == RepeatNormal && !pSrc->transform
&& (pSrc->pDrawable->width != 1 || pSrc->pDrawable->height != 1);
Index: fbmmx.c
===================================================================
RCS file: /cvs/xserver/xserver/fb/fbmmx.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -d -r1.2 -r1.3
--- fbmmx.c 18 Apr 2005 22:54:21 -0000 1.2
+++ fbmmx.c 12 Jul 2005 09:57:00 -0000 1.3
@@ -1,6 +1,7 @@
/*
* Copyright © 2004 Red Hat, Inc.
* Copyright © 2004 Nicholas Miell
+ * Copyright © 2005 Trolltech AS
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
@@ -12,15 +13,18 @@
* suitability of this software for any purpose. It is provided "as is"
* without express or implied warranty.
*
- * RED HAT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL RED HAT
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
*
* Author: Søren Sandmann (sandmann at redhat.com)
* Minor Improvements: Nicholas Miell (nmiell at gmail.com)
+ * MMX code paths for fbcompose.c by Lars Knoll (lars at trolltech.com)
*
* Based on work by Owen Taylor
*/
@@ -34,14 +38,7 @@
#include "fbmmx.h"
#include <mmintrin.h>
-
-#if !defined(USE_SSE) && defined(__amd64__)
-#define USE_SSE
-#endif
-
-#ifdef USE_SSE
#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
-#endif
#ifdef RENDER
@@ -49,8 +46,6 @@
#include "mipict.h"
#include "fbpict.h"
-typedef unsigned long long ullong;
-
#define noVERBOSE
#ifdef VERBOSE
@@ -59,6 +54,581 @@
#define CHECKPOINT()
#endif
+/* --------------- MMX code patch for fbcompose.c --------------------- */
+
+static FASTCALL void
+mmxCombineMaskU (CARD32 *src, const CARD32 *mask, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+
+ const CARD32 *end = mask + width;
+ while (mask < end) {
+ __m64 a = MmxTo(*mask);
+ __m64 s = MmxTo(*src);
+ a = MmxAlpha(a);
+ MmxMul(s, a);
+ *src = MmxFrom(s);
+ ++src;
+ ++mask;
+ }
+ _mm_empty();
+}
+
+
+static FASTCALL void
+mmxCombineOverU (CARD32 *dest, const CARD32 *src, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+ const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
+
+ const CARD32 *end = dest + width;
+
+ while (dest < end) {
+ __m64 x, y, a;
+ x = MmxTo(*src);
+ y = MmxTo(*dest);
+ a = MmxAlpha(x);
+ a = MmxNegate(a);
+ MmxMulAdd(y, a, x);
+ *dest = MmxFrom(y);
+ ++dest;
+ ++src;
+ }
+ _mm_empty();
+}
+
+static FASTCALL void
+mmxCombineOverReverseU (CARD32 *dest, const CARD32 *src, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+ const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
+
+ const CARD32 *end = dest + width;
+
+ while (dest < end) {
+ __m64 x, y, a;
+ x = MmxTo(*dest);
+ y = MmxTo(*src);
+ a = MmxAlpha(x);
+ a = MmxNegate(a);
+ MmxMulAdd(y, a, x);
+ *dest = MmxFrom(y);
+ ++dest;
+ ++src;
+ }
+ _mm_empty();
+}
+
+static FASTCALL void
+mmxCombineInU (CARD32 *dest, const CARD32 *src, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+
+ const CARD32 *end = dest + width;
+
+ while (dest < end) {
+ __m64 x, a;
+ x = MmxTo(*src);
+ a = MmxTo(*dest);
+ a = MmxAlpha(a);
+ MmxMul(x, a);
+ *dest = MmxFrom(x);
+ ++dest;
+ ++src;
+ }
+ _mm_empty();
+}
+
+static FASTCALL void
+mmxCombineInReverseU (CARD32 *dest, const CARD32 *src, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+
+ const CARD32 *end = dest + width;
+
+ while (dest < end) {
+ __m64 x, a;
+ x = MmxTo(*dest);
+ a = MmxTo(*src);
+ a = MmxAlpha(a);
+ MmxMul(x, a);
+ *dest = MmxFrom(x);
+ ++dest;
+ ++src;
+ }
+ _mm_empty();
+}
+
+static FASTCALL void
+mmxCombineOutU (CARD32 *dest, const CARD32 *src, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+ const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
+
+ const CARD32 *end = dest + width;
+
+ while (dest < end) {
+ __m64 x, a;
+ x = MmxTo(*src);
+ a = MmxTo(*dest);
+ a = MmxAlpha(a);
+ a = MmxNegate(a);
+ MmxMul(x, a);
+ *dest = MmxFrom(x);
+ ++dest;
+ ++src;
+ }
+ _mm_empty();
+}
+
+static FASTCALL void
+mmxCombineOutReverseU (CARD32 *dest, const CARD32 *src, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+ const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
+
+ const CARD32 *end = dest + width;
+
+ while (dest < end) {
+ __m64 x, a;
+ x = MmxTo(*dest);
+ a = MmxTo(*src);
+ a = MmxAlpha(a);
+ a = MmxNegate(a);
+ MmxMul(x, a);
+ *dest = MmxFrom(x);
+ ++dest;
+ ++src;
+ }
+ _mm_empty();
+}
+
+static FASTCALL void
+mmxCombineAtopU (CARD32 *dest, const CARD32 *src, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+ const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
+
+ const CARD32 *end = dest + width;
+
+ while (dest < end) {
+ __m64 s, da, d, sia;
+ s = MmxTo(*src);
+ d = MmxTo(*dest);
+ sia = MmxAlpha(s);
+ sia = MmxNegate(sia);
+ da = MmxAlpha(d);
+ MmxAddMul(s, da, d, sia);
+ *dest = MmxFrom(s);
+ ++dest;
+ ++src;
+ }
+ _mm_empty();
+}
+
+static FASTCALL void
+mmxCombineAtopReverseU (CARD32 *dest, const CARD32 *src, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+ const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
+
+ const CARD32 *end;
+
+ end = dest + width;
+
+ while (dest < end) {
+ __m64 s, dia, d, sa;
+ s = MmxTo(*src);
+ d = MmxTo(*dest);
+ sa = MmxAlpha(s);
+ dia = MmxAlpha(d);
+ dia = MmxNegate(dia);
+ MmxAddMul(s, dia, d, sa);
+ *dest = MmxFrom(s);
+ ++dest;
+ ++src;
+ }
+ _mm_empty();
+}
+
+static FASTCALL void
+mmxCombineXorU (CARD32 *dest, const CARD32 *src, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+ const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
+
+ const CARD32 *end = dest + width;
+
+ while (dest < end) {
+ __m64 s, dia, d, sia;
+ s = MmxTo(*src);
+ d = MmxTo(*dest);
+ sia = MmxAlpha(s);
+ dia = MmxAlpha(d);
+ sia = MmxNegate(sia);
+ dia = MmxNegate(dia);
+ MmxAddMul(s, dia, d, sia);
+ *dest = MmxFrom(s);
+ ++dest;
+ ++src;
+ }
+ _mm_empty();
+}
+
+static FASTCALL void
+mmxCombineAddU (CARD32 *dest, const CARD32 *src, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+
+ const CARD32 *end = dest + width;
+ while (dest < end) {
+ __m64 s, d;
+ s = MmxTo(*src);
+ d = MmxTo(*dest);
+ *dest = MmxFrom(MmxAdd(s, d));
+ ++dest;
+ ++src;
+ }
+ _mm_empty();
+}
+
+static FASTCALL void
+mmxCombineSaturateU (CARD32 *dest, const CARD32 *src, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+
+ const CARD32 *end = dest + width;
+ while (dest < end) {
+ CARD32 s = *src;
+ CARD32 d = *dest;
+ __m64 ms = MmxTo(s);
+ __m64 md = MmxTo(d);
+ CARD32 sa = s >> 24;
+ CARD32 da = ~d >> 24;
+
+ if (sa > da) {
+ __m64 msa = MmxTo(FbIntDiv(da, sa));
+ msa = MmxAlpha(msa);
+ MmxMul(ms, msa);
+ }
+ MmxAdd(md, ms);
+ *dest = MmxFrom(md);
+ ++src;
+ ++dest;
+ }
+ _mm_empty();
+}
+
+
+static FASTCALL void
+mmxCombineSrcC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+
+ const CARD32 *end = src + width;
+ while (src < end) {
+ __m64 a = MmxTo(*mask);
+ __m64 s = MmxTo(*src);
+ MmxMul(s, a);
+ *dest = MmxFrom(s);
+ ++src;
+ ++mask;
+ ++dest;
+ }
+ _mm_empty();
+}
+
+static FASTCALL void
+mmxCombineOverC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+ const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
+
+ const CARD32 *end = src + width;
+ while (src < end) {
+ __m64 a = MmxTo(*mask);
+ __m64 s = MmxTo(*src);
+ __m64 d = MmxTo(*dest);
+ __m64 sa = MmxAlpha(s);
+ MmxMul(s, a);
+ MmxMul(a, sa);
+ a = MmxNegate(a);
+ MmxMulAdd(d, a, s);
+ *dest = MmxFrom(d);
+ ++src;
+ ++dest;
+ ++mask;
+ }
+ _mm_empty();
+}
+
+static FASTCALL void
+mmxCombineOverReverseC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+ const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
+
+ const CARD32 *end = src + width;
+ while (src < end) {
+ __m64 a = MmxTo(*mask);
+ __m64 s = MmxTo(*src);
+ __m64 d = MmxTo(*dest);
+ __m64 da = MmxAlpha(d);
+ da = MmxNegate(da);
+ MmxMul(s, a);
+ MmxMulAdd(s, da, d);
+ *dest = MmxFrom(s);
+ ++src;
+ ++dest;
+ ++mask;
+ }
+ _mm_empty();
+}
+
+
+static FASTCALL void
+mmxCombineInC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+
+ const CARD32 *end = src + width;
+ while (src < end) {
+ __m64 a = MmxTo(*mask);
+ __m64 s = MmxTo(*src);
+ __m64 d = MmxTo(*dest);
+ __m64 da = MmxAlpha(d);
+ MmxMul(s, a);
+ MmxMul(s, da);
+ *dest = MmxFrom(s);
+ ++src;
+ ++dest;
+ ++mask;
+ }
+ _mm_empty();
+}
+
+static FASTCALL void
+mmxCombineInReverseC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+
+ const CARD32 *end = src + width;
+ while (src < end) {
+ __m64 a = MmxTo(*mask);
+ __m64 s = MmxTo(*src);
+ __m64 d = MmxTo(*dest);
+ __m64 sa = MmxAlpha(s);
+ MmxMul(a, sa);
+ MmxMul(d, a);
+ *dest = MmxFrom(d);
+ ++src;
+ ++dest;
+ ++mask;
+ }
+ _mm_empty();
+}
+
+static FASTCALL void
+mmxCombineOutC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+ const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
+
+ const CARD32 *end = src + width;
+ while (src < end) {
+ __m64 a = MmxTo(*mask);
+ __m64 s = MmxTo(*src);
+ __m64 d = MmxTo(*dest);
+ __m64 da = MmxAlpha(d);
+ da = MmxNegate(da);
+ MmxMul(s, a);
+ MmxMul(s, da);
+ *dest = MmxFrom(s);
+ ++src;
+ ++dest;
+ ++mask;
+ }
+ _mm_empty();
+}
+
+static FASTCALL void
+mmxCombineOutReverseC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+ const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
+
+ const CARD32 *end = src + width;
+ while (src < end) {
+ __m64 a = MmxTo(*mask);
+ __m64 s = MmxTo(*src);
+ __m64 d = MmxTo(*dest);
+ __m64 sa = MmxAlpha(s);
+ MmxMul(a, sa);
+ a = MmxNegate(a);
+ MmxMul(d, a);
+ *dest = MmxFrom(d);
+ ++src;
+ ++dest;
+ ++mask;
+ }
+ _mm_empty();
+}
+
+static FASTCALL void
+mmxCombineAtopC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+ const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
+
+ const CARD32 *end = src + width;
+ while (src < end) {
+ __m64 a = MmxTo(*mask);
+ __m64 s = MmxTo(*src);
+ __m64 d = MmxTo(*dest);
+ __m64 da = MmxAlpha(d);
+ __m64 sa = MmxAlpha(s);
+ MmxMul(s, a);
+ MmxMul(a, sa);
+ a = MmxNegate(a);
+ MmxAddMul(d, a, s, da);
+ *dest = MmxFrom(d);
+ ++src;
+ ++dest;
+ ++mask;
+ }
+ _mm_empty();
+}
+
+static FASTCALL void
+mmxCombineAtopReverseC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+ const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
+
+ const CARD32 *end = src + width;
+ while (src < end) {
+ __m64 a = MmxTo(*mask);
+ __m64 s = MmxTo(*src);
+ __m64 d = MmxTo(*dest);
+ __m64 da = MmxAlpha(d);
+ __m64 sa = MmxAlpha(s)
+ MmxMul(s, a);
+ MmxMul(a, sa);
+ da = MmxNegate(da);
+ MmxAddMul(d, a, s, da);
+ *dest = MmxFrom(d);
+ ++src;
+ ++dest;
+ ++mask;
+ }
+ _mm_empty();
+}
+
+static FASTCALL void
+mmxCombineXorC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+ const __m64 mmx_4x00ff = (__m64) 0x00ff00ff00ff00ffULL;
+
+ const CARD32 *end = src + width;
+ while (src < end) {
+ __m64 a = MmxTo(*mask);
+ __m64 s = MmxTo(*src);
+ __m64 d = MmxTo(*dest);
+ __m64 da = MmxAlpha(d);
+ __m64 sa = MmxAlpha(s);
+ MmxMul(s, a);
+ MmxMul(a, sa);
+ da = MmxNegate(da);
+ a = MmxNegate(a);
+ MmxAddMul(d, a, s, da);
+ *dest = MmxFrom(d);
+ ++src;
+ ++dest;
+ ++mask;
+ }
+ _mm_empty();
+}
+
+static FASTCALL void
+mmxCombineAddC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
+{
+ const __m64 mmx_0 = _mm_setzero_si64();
+ const __m64 mmx_4x0080 = (__m64) 0x0080008000800080ULL;
+
+ const CARD32 *end = src + width;
+ while (src < end) {
+ __m64 a = MmxTo(*mask);
+ __m64 s = MmxTo(*src);
+ __m64 d = MmxTo(*dest);
+ MmxMul(s, a);
+ d = MmxAdd(s, d);
+ *dest = MmxFrom(d);
+ ++src;
+ ++dest;
+ ++mask;
+ }
+ _mm_empty();
+}
+
+extern FbComposeFunctions composeFunctions;
+
+void fbComposeSetupMMX(void)
+{
+ /* check if we have MMX support and initialize accordingly */
+ if (fbHaveMMX()) {
+ composeFunctions.combineU[PictOpOver] = mmxCombineOverU;
+ composeFunctions.combineU[PictOpOverReverse] = mmxCombineOverReverseU;
+ composeFunctions.combineU[PictOpIn] = mmxCombineInU;
+ composeFunctions.combineU[PictOpInReverse] = mmxCombineInReverseU;
+ composeFunctions.combineU[PictOpOut] = mmxCombineOutU;
+ composeFunctions.combineU[PictOpOutReverse] = mmxCombineOutReverseU;
+ composeFunctions.combineU[PictOpAtop] = mmxCombineAtopU;
+ composeFunctions.combineU[PictOpAtopReverse] = mmxCombineAtopReverseU;
+ composeFunctions.combineU[PictOpXor] = mmxCombineXorU;
+ composeFunctions.combineU[PictOpAdd] = mmxCombineAddU;
+ composeFunctions.combineU[PictOpSaturate] = mmxCombineSaturateU;
+
+ composeFunctions.combineC[PictOpSrc] = mmxCombineSrcC;
+ composeFunctions.combineC[PictOpOver] = mmxCombineOverC;
+ composeFunctions.combineC[PictOpOverReverse] = mmxCombineOverReverseC;
+ composeFunctions.combineC[PictOpIn] = mmxCombineInC;
+ composeFunctions.combineC[PictOpInReverse] = mmxCombineInReverseC;
+ composeFunctions.combineC[PictOpOut] = mmxCombineOutC;
+ composeFunctions.combineC[PictOpOutReverse] = mmxCombineOutReverseC;
+ composeFunctions.combineC[PictOpAtop] = mmxCombineAtopC;
+ composeFunctions.combineC[PictOpAtopReverse] = mmxCombineAtopReverseC;
+ composeFunctions.combineC[PictOpXor] = mmxCombineXorC;
+ composeFunctions.combineC[PictOpAdd] = mmxCombineAddC;
+
+ composeFunctions.combineMaskU = mmxCombineMaskU;
+ }
+}
+
+
+/* ------------------ MMX code paths called from fbpict.c ----------------------- */
+
typedef struct
{
ullong mmx_4x00ff;
@@ -128,12 +698,6 @@
return res;
}
-#ifdef USE_SSE
-#define HAVE_PSHUFW
-#endif
-
-#ifdef HAVE_PSHUFW
-
static __inline__ __m64
expand_alpha (__m64 pixel)
{
@@ -152,61 +716,6 @@
return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE(3, 0, 1, 2));
}
-#else
-
-static __inline__ __m64
-expand_alpha (__m64 pixel)
-{
- __m64 t1, t2;
-
- t1 = shift (pixel, -48);
- t2 = shift (t1, 16);
- t1 = _mm_or_si64 (t1, t2);
- t2 = shift (t1, 32);
- t1 = _mm_or_si64 (t1, t2);
-
- return t1;
-}
-
-static __inline__ __m64
-expand_alpha_rev (__m64 pixel)
-{
- __m64 t1, t2;
-
- /* move alpha to low 16 bits and zero the rest */
- t1 = shift (pixel, 48);
- t1 = shift (t1, -48);
-
- t2 = shift (t1, 16);
- t1 = _mm_or_si64 (t1, t2);
- t2 = shift (t1, 32);
- t1 = _mm_or_si64 (t1, t2);
-
- return t1;
-}
-
-static __inline__ __m64
-invert_colors (__m64 pixel)
-{
- __m64 x, y, z;
-
- x = y = z = pixel;
-
- x = _mm_and_si64 (x, MC(ffff0000ffff0000));
- y = _mm_and_si64 (y, MC(000000000000ffff));
- z = _mm_and_si64 (z, MC(0000ffff00000000));
-
- y = shift (y, 32);
- z = shift (z, -32);
-
- x = _mm_or_si64 (x, y);
- x = _mm_or_si64 (x, z);
-
- return x;
-}
-
-#endif
-
/* Notes about writing mmx code
*
* give memory operands as the second operand. If you give it as the
@@ -1890,7 +2399,89 @@
width, height);
}
-#ifndef __amd64__
+#if !defined(__amd64__) && !defined(__x86_64__)
+
+enum CPUFeatures {
+ NoFeatures = 0,
+ MMX = 0x1,
+ MMX_Extensions = 0x2,
+ SSE = 0x6,
+ SSE2 = 0x8,
+ CMOV = 0x10
+};
+
+static uint detectCPUFeatures(void) {
+ uint result;
+ char vendor[13];
+ vendor[0] = 0;
+ vendor[12] = 0;
+ /* see p. 118 of amd64 instruction set manual Vol3 */
+ asm ("push %%ebx\n"
+ "pushf\n"
+ "pop %%eax\n"
+ "mov %%eax, %%ebx\n"
+ "xor $0x00200000, %%eax\n"
+ "push %%eax\n"
+ "popf\n"
+ "pushf\n"
+ "pop %%eax\n"
+ "mov $0x0, %%edx\n"
+ "xor %%ebx, %%eax\n"
+ "jz skip\n"
+
+ "mov $0x00000000, %%eax\n"
+ "cpuid\n"
+ "mov %%ebx, %1\n"
+ "mov %%edx, %2\n"
+ "mov %%ecx, %3\n"
+ "mov $0x00000001, %%eax\n"
+ "cpuid\n"
+ "skip:\n"
+ "pop %%ebx\n"
+ "mov %%edx, %0\n"
+ : "=r" (result),
+ "=m" (vendor[0]),
+ "=m" (vendor[4]),
+ "=m" (vendor[8])
+ :
+ : "%eax", "%ebx", "%ecx", "%edx"
+ );
+
+ uint features = 0;
+ if (result) {
+ // result now contains the standard feature bits
+ if (result & (1 << 15))
+ features |= CMOV;
+ if (result & (1 << 23))
+ features |= MMX;
+ if (result & (1 << 25))
+ CAPITAL LETTER PSI
0x00d9: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x00da: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
0x00db: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
0x00dc: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS
0x00dd: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS
0x00de: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS
0x00df: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS
0x00e0: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA
0x00e2: 0x03b2, # GREEK SMALL LETTER BETA
0x00e3: 0x03b3, # GREEK SMALL LETTER GAMMA
0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA
0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON
0x00e6: 0x03b6, # GREEK SMALL LETTER ZETA
0x00e7: 0x03b7, # GREEK SMALL LETTER ETA
0x00e8: 0x03b8, # GREEK SMALL LETTER THETA
0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA
0x00ea: 0x03ba, # GREEK SMALL LETTER KAPPA
0x00eb: 0x03bb, # GREEK SMALL LETTER LAMDA
0x00ec: 0x03bc, # GREEK SMALL LETTER MU
0x00ed: 0x03bd, # GREEK SMALL LETTER NU
0x00ee: 0x03be, # GREEK SMALL LETTER XI
0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON
0x00f0: 0x03c0, # GREEK SMALL LETTER PI
0x00f1: 0x03c1, # GREEK SMALL LETTER RHO
0x00f2: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA
0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00f4: 0x03c4, # GREEK SMALL LETTER TAU
0x00f5: 0x03c5, # GREEK SMALL LETTER UPSILON
0x00f6: 0x03c6, # GREEK SMALL LETTER PHI
0x00f7: 0x03c7, # GREEK SMALL LETTER CHI
0x00f8: 0x03c8, # GREEK SMALL LETTER PSI
0x00f9: 0x03c9, # GREEK SMALL LETTER OMEGA
0x00fa: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
0x00ff: None, # UNDEFINED
})
### Encoding Map
encoding_map = codecs.make_encoding_map(decoding_map)
""" Python Character Mapping Codec generated from 'CP1254.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
0x0086: 0x2020, # DAGGER
0x0087: 0x2021, # DOUBLE DAGGER
0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
0x0089: 0x2030, # PER MILLE SIGN
0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE
0x008d: None, # UNDEFINED
0x008e: None, # UNDEFINED
0x008f: None, # UNDEFINED
0x0090: None, # UNDEFINED
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x0095: 0x2022, # BULLET
0x0096: 0x2013, # EN DASH
0x0097: 0x2014, # EM DASH
0x0098: 0x02dc, # SMALL TILDE
0x0099: 0x2122, # TRADE MARK SIGN
0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON
0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
0x009c: 0x0153, # LATIN SMALL LIGATURE OE
0x009d: None, # UNDEFINED
0x009e: None, # UNDEFINED
0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
})
### Encoding Map
encoding_map = codecs.make_encoding_map(decoding_map)
CARD32 *mask, int width);
+
+typedef struct _FbComposeFunctions {
+ CombineFuncU *combineU;
+ CombineFuncC *combineC;
+ CombineMaskU combineMaskU;
+} FbComposeFunctions;
/* fbaddtrap.c */
More information about the xserver-commit
mailing list