a just another stupid newbie :)
Jaymz Julian
jaymz@artificial-stupidity.net
Sat, 13 Dec 2003 04:57:37 +1100
--CNfT9TXqV7nd4cfk
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
So, I wanted transparency to not be as slow as molasses on my system
(p3m-500), and so i figured i'd try and do something about it rather than
You should not apply the attached patch to your tree. I'm still getting
my head around all of this code, I probably fucked it up :).
This patch improves performance of xrender composites significantly on 24bit
displays (in my case, an ati mach64 @1024x768x32). It could probably be
improved further, specifically by only doing word writes, although this
didn't help on my laptop, I suspect it would help on powerpc, and
know it would on arm.
I havn't tested it on non-intel systems yet, but if anyone cares, I'll fix
the inevitable bugs on powerpc when i fix my mac in a few days.
The performance is still pretty terrible on this system (my unscientific
test program, mplayer, gets around 3-4fps with this patch, and abouit 0.5
fps without it), but I suspect that I could make it significantly faster
if I were to throw sse/altivec at it.
Is this sort of work on making software eyecandy run at a reasonable speed
wanted/needed? or am i on totally the wrong track here (this stuff is useful
for my own use anyhow, so feel free to tell me to go to hell :)
-- jj
p.s. is it a percularity of the mach64 driver, or at 32bit visuals kinda
broken at the moment?
--
Jaymz Julian aka A Life in Hell / Warriors of the Wasteland / Unreal
Coder, Visionary, Fat Ass.
"Hannibal is a serial killer. He only likes to kill and eat people.
Very few people have `I want to be killed and eaten' on their cards,
so Hannibal is out of a job." - http://cards.sf.net
--CNfT9TXqV7nd4cfk
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="jj_24bit_combine.patch"
Index: fb/fbcompose.c
===================================================================
RCS file: /cvs/xserver/xserver/fb/fbcompose.c,v
retrieving revision 1.17
diff -u -3 -p -r1.17 fbcompose.c
--- fb/fbcompose.c 11 Sep 2003 05:12:50 -0000 1.17
+++ fb/fbcompose.c 12 Dec 2003 17:41:35 -0000
@@ -1649,15 +1649,21 @@ fbFetch_r8g8b8 (FbCompositeOperand *op)
FbBits *line = op->u.drawable.line; CARD32 offset = op->u.drawable.offset;
CARD8 *pixel = ((CARD8 *) line) + (offset >> 3);
#if IMAGE_BYTE_ORDER == MSBFirst
return (0xff000000 |
(pixel[0] << 16) |
(pixel[1] << 8) |
(pixel[2]));
#else
- return (0xff000000 |
- (pixel[2] << 16) |
- (pixel[1] << 8) |
- (pixel[0]));
+ #ifdef WORKING_UNALIGNED_INT
+ return *(CARD32 *)pixel|0xff000000;
+ #else
+ return (0xff000000 |
+ (pixel[2] << 16) |
+ (pixel[1] << 8) |
+ (pixel[0]));
+ #endif
#endif
}
Index: fb/fbpict.c
===================================================================
RCS file: /cvs/xserver/xserver/fb/fbpict.c,v
retrieving revision 1.18
diff -u -3 -p -r1.18 fbpict.c
--- fb/fbpict.c 5 Nov 2003 05:45:31 -0000 1.18
+++ fb/fbpict.c 12 Dec 2003 17:41:38 -0000
@@ -959,6 +959,117 @@ fbCompositeTrans_0565xnx0565(CARD8
}
}
+void
+fbCompositeTrans_0888xnx0888(CARD8 op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height)
+{
+ CARD8 *dstLine, *dst,*idst;
+ CARD8 *srcLine, *src;
+ FbStride dstStride, srcStride;
+ CARD16 w;
+ FbBits mask;
+ CARD16 maskAlpha,maskiAlpha;
+
+ fbComposeGetSolid (pMask, mask);
+ maskAlpha = mask >> 24;
+ maskiAlpha= 255-maskAlpha;
+
+ if (!maskAlpha)
+ return;
+ //if (maskAlpha == 0xff)
+ //{
+ //fbCompositeSrc_0888x0888 (op, pSrc, pMask, pDst,
+ // xSrc, ySrc, xMask, yMask, xDst, yDst,
+ // width, height);
+ //return;
+ //}
+
+ fbComposeGetStart (pSrc, xSrc, ySrc, CARD8, srcStride, srcLine, 3);
+ fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 3);
+
+ {
+ unsigned int ws,wt,wd,ww;
+ CARD32 workingSource;
+ CARD32 *wsrc;
+ CARD32 rs,gs,bs;
+ CARD32 rd,gd,bd;
+
+ CARD32 workingiDest,workingoDest;
+ CARD32 *widst,wodst;
+
+ while (height--)
+ {
+ idst=dst = dstLine;
+ dstLine += dstStride;
+ src = srcLine;
+ srcLine += srcStride;
+ w = width;
+
+#if IMAGE_BYTE_ORDER == LSBFirst
+
+#define setupPackedReader(count,temp,where,workingWhere,workingVal) count=(int)where; \
+ temp=count&3; \
+ where-=temp; \
+ workingWhere=(CARD32 *)where; \
+ workingVal=*workingWhere++; \
+ count=4-temp; \
+ workingVal>>=(8*temp)
+#define readPacked(where,x,y,z) if(!(x)) { (x)=4; y=*z++; } where=(y)&0xff; (y)>>=8; (x)--;
+#define readPackedSource(where) readPacked(where,ws,workingSource,wsrc)
+#define readPackedDest(where) readPacked(where,wd,workingiDest,widst)
+
+#else
+
+#warning "I havn't tested fbCompositeTrans_0888xnx0888() on big endian yet!"
+
+#define setupPackedReader(count,temp,where,workingWhere,workingVal) count=(int)where; \
+ temp=count&3; \
+ where-=temp; \
+ workingWhere=(CARD32 *)where; \
+ workingVal=*workingWhere++; \
+ count=4-temp; \
+ workingVal<<=(8*temp)
+#define readPacked(where,x,y,z) if(!(x)) { (x)=4; y=*z++; } where=(y)>>24; (y)<<=8; (x)--;
+#define readPackedSource(where) readPacked(where,ws,workingSource,wsrc)
+#define readPackedDest(where) readPacked(where,wd,workingiDest,widst)
+
+#endif
+ setupPackedReader(ws,wt,src,wsrc,workingSource);
+ setupPackedReader(wd,wt,idst,widst,workingiDest);
+
+ while (w--)
+ {
+ readPackedSource(rs);
+ readPackedSource(gs);
+ readPackedSource(bs);
+
+ readPackedDest(rd);
+ readPackedDest(gd);
+ readPackedDest(bd);
+
+ rd=(rs*maskAlpha+rd*maskiAlpha)>>8;
+ gd=(gs*maskAlpha+gd*maskiAlpha)>>8;
+ bd=(bs*maskAlpha+bd*maskiAlpha)>>8;
+
+ dst[0]=rd;
+ dst[1]=gd;
+ dst[2]=bd;
+ dst+=3;
+ }
+ }
+ }
+}
+
/*
* Simple bitblt
*/
@@ -1168,6 +1279,11 @@ fbComposite (CARD8 op,
if (pDst->format == pSrc->format)
func = fbCompositeTrans_0565xnx0565;
break;
+ case PICT_r8g8b8:
+ case PICT_b8g8r8:
+ if (pDst->format == pSrc->format)
+ func = fbCompositeTrans_0888xnx0888;
+ break;
}
if (func != fbCompositeGeneral)
maskRepeat = FALSE;
@@ -1281,7 +1397,7 @@ fbComposite (CARD8 op,
x_src = pbox->x1 - xDst + xSrc;
x_msk = pbox->x1 - xDst + xMask;
x_dst = pbox->x1;
- if (maskRepeat)
+ if (maskRepeat && pMask->pDrawable->height > 1)
{
y_msk = mod (y_msk, pMask->pDrawable->height);
if (h_this > pMask->pDrawable->height - y_msk)
@@ -1296,7 +1412,7 @@ fbComposite (CARD8 op,
while (w)
{
w_this = w;
- if (maskRepeat)
+ if (maskRepeat && pMask->pDrawable->width > 1)
{
x_msk = mod (x_msk, pMask->pDrawable->width);
if (w_this > pMask->pDrawable->width - x_msk)
Index: fb/fbpict.h
===================================================================
RCS file: /cvs/xserver/xserver/fb/fbpict.h,v
retrieving revision 1.13
diff -u -3 -p -r1.13 fbpict.h
--- fb/fbpict.h 5 Nov 2003 05:45:31 -0000 1.13
+++ fb/fbpict.h 12 Dec 2003 17:41:42 -0000
@@ -969,6 +969,20 @@ fbCompositeTrans_0565xnx0565(CARD8
CARD16 width,
CARD16 height);
+void
+fbCompositeTrans_0888xnx0888(CARD8 op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+
void
fbCompositeSrcSrc_nxn (CARD8 op,
PicturePtr pSrc,
Index: hw/kdrive/src/kaa.c
===================================================================
RCS file: /cvs/xserver/xserver/hw/kdrive/src/kaa.c,v
retrieving revision 1.21
diff -u -3 -p -r1.21 kaa.c
--- hw/kdrive/src/kaa.c 20 Nov 2003 07:49:46 -0000 1.21
+++ hw/kdrive/src/kaa.c 12 Dec 2003 17:41:58 -0000
@@ -693,7 +693,9 @@ kaaImageGlyphBlt (DrawablePtr pDrawable,
switch (dstBpp) {
case 8: glyph = fbGlyph8; break;
case 16: glyph = fbGlyph16; break;
+#ifndef FBNO24BIT
case 24: glyph = fbGlyph24; break;
+#endif
case 32: glyph = fbGlyph32; break;
}
Index: include/servermd.h
===================================================================
RCS file: /cvs/xserver/xserver/include/servermd.h,v
retrieving revision 3.55
diff -u -3 -p -r3.55 servermd.h
--- include/servermd.h 11 Sep 2003 05:12:51 -0000 3.55
+++ include/servermd.h 12 Dec 2003 17:42:03 -0000
@@ -122,6 +122,11 @@ SOFTWARE.
* Currently defined for SPARC.
*/
+
+// FIXME: document this /jj
+#define WORKING_UNALIGNED_INT
+
+
#ifdef vax
#define IMAGE_BYTE_ORDER LSBFirst /* Values for the VAX only */
--CNfT9TXqV7nd4cfk--