a just another stupid newbie :)

Jaymz Julian jaymz@artificial-stupidity.net
Sat, 13 Dec 2003 04:57:37 +1100


--CNfT9TXqV7nd4cfk
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

So, I wanted transparency to not be as slow as molasses on my system
(p3m-500), and so i figured i'd try and do something about it rather than

You should not apply the attached patch to your tree.  I'm still getting
my head around all of this code, I probably fucked it up :).

This patch improves performance of xrender composites significantly on 24bit
displays (in my case, an ati mach64 @1024x768x32).  It could probably be
improved further, specifically by only doing word writes, although this
didn't help on my laptop, I suspect it would help on powerpc, and
know it would on arm.  

I havn't tested it on non-intel systems yet, but if anyone cares, I'll fix
the inevitable bugs on powerpc when i fix my mac in a few days.  

The performance is still pretty terrible on this system (my unscientific
test program, mplayer, gets around 3-4fps with this patch, and abouit 0.5
fps without it), but I suspect that I could make it significantly faster
if I were to throw sse/altivec at it.  

Is this sort of work on making software eyecandy run at a reasonable speed 
wanted/needed?  or am i on totally the wrong track here (this stuff is useful
for my own use anyhow, so feel free to tell me to go to hell :)

	-- jj

p.s. is it a percularity of the mach64 driver, or at 32bit visuals kinda
broken at the moment?

-- 
Jaymz Julian aka A Life in Hell / Warriors of the Wasteland / Unreal
Coder, Visionary, Fat Ass.
"Hannibal is a serial killer. He only likes to kill and eat people. 
 Very few people have `I want to be killed and eaten' on their cards, 
 so Hannibal is out of a job." - http://cards.sf.net

--CNfT9TXqV7nd4cfk
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="jj_24bit_combine.patch"

Index: fb/fbcompose.c
===================================================================
RCS file: /cvs/xserver/xserver/fb/fbcompose.c,v
retrieving revision 1.17
diff -u -3 -p -r1.17 fbcompose.c
--- fb/fbcompose.c	11 Sep 2003 05:12:50 -0000	1.17
+++ fb/fbcompose.c	12 Dec 2003 17:41:35 -0000
@@ -1649,15 +1649,21 @@ fbFetch_r8g8b8 (FbCompositeOperand *op)
     FbBits  *line = op->u.drawable.line; CARD32 offset = op->u.drawable.offset;
     CARD8   *pixel = ((CARD8 *) line) + (offset >> 3);
 #if IMAGE_BYTE_ORDER == MSBFirst
     return (0xff000000 |
 	    (pixel[0] << 16) |
 	    (pixel[1] << 8) |
 	    (pixel[2]));
 #else
-    return (0xff000000 |
-	    (pixel[2] << 16) |
-	    (pixel[1] << 8) |
-	    (pixel[0]));
+	#ifdef WORKING_UNALIGNED_INT
+		return *(CARD32 *)pixel|0xff000000;
+	#else
+	    return (0xff000000 |
+		    (pixel[2] << 16) |
+		    (pixel[1] << 8) |
+		    (pixel[0]));
+	#endif
 #endif
 }
 
Index: fb/fbpict.c
===================================================================
RCS file: /cvs/xserver/xserver/fb/fbpict.c,v
retrieving revision 1.18
diff -u -3 -p -r1.18 fbpict.c
--- fb/fbpict.c	5 Nov 2003 05:45:31 -0000	1.18
+++ fb/fbpict.c	12 Dec 2003 17:41:38 -0000
@@ -959,6 +959,117 @@ fbCompositeTrans_0565xnx0565(CARD8      
     }
 }
 
+void
+fbCompositeTrans_0888xnx0888(CARD8      op,
+			     PicturePtr pSrc,
+			     PicturePtr pMask,
+			     PicturePtr pDst,
+			     INT16      xSrc,
+			     INT16      ySrc,
+			     INT16      xMask,
+			     INT16      yMask,
+			     INT16      xDst,
+			     INT16      yDst,
+			     CARD16     width,
+			     CARD16     height)
+{
+    CARD8	*dstLine, *dst,*idst;
+    CARD8	*srcLine, *src;
+    FbStride	dstStride, srcStride;
+    CARD16	w;
+    FbBits	mask;
+    CARD16	maskAlpha,maskiAlpha;
+    
+    fbComposeGetSolid (pMask, mask);
+    maskAlpha = mask >> 24;
+	maskiAlpha= 255-maskAlpha;
+    
+    if (!maskAlpha)
+	return;
+    //if (maskAlpha == 0xff)
+    //{
+	//fbCompositeSrc_0888x0888 (op, pSrc, pMask, pDst,
+	//			  xSrc, ySrc, xMask, yMask, xDst, yDst, 
+	//			  width, height);
+	//return;
+    //}
+	
+    fbComposeGetStart (pSrc, xSrc, ySrc, CARD8, srcStride, srcLine, 3);
+    fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 3);
+
+	{
+			unsigned int ws,wt,wd,ww;
+			CARD32 workingSource;
+			CARD32 *wsrc;
+			CARD32 rs,gs,bs;
+			CARD32 rd,gd,bd;
+			
+			CARD32 workingiDest,workingoDest;
+			CARD32 *widst,wodst;
+
+    while (height--)
+    {
+		idst=dst = dstLine;
+		dstLine += dstStride;
+		src = srcLine;
+		srcLine += srcStride;
+		w = width;
+
+#if IMAGE_BYTE_ORDER == LSBFirst
+
+#define setupPackedReader(count,temp,where,workingWhere,workingVal) count=(int)where; \
+					temp=count&3; \
+					where-=temp; \
+					workingWhere=(CARD32 *)where; \
+					workingVal=*workingWhere++; \
+					count=4-temp; \
+					workingVal>>=(8*temp)
+#define readPacked(where,x,y,z) if(!(x)) { (x)=4; y=*z++; } where=(y)&0xff; (y)>>=8; (x)--;
+#define readPackedSource(where) readPacked(where,ws,workingSource,wsrc)
+#define readPackedDest(where) readPacked(where,wd,workingiDest,widst)
+		
+#else
+
+#warning "I havn't tested fbCompositeTrans_0888xnx0888() on big endian yet!"
+
+#define setupPackedReader(count,temp,where,workingWhere,workingVal) count=(int)where; \
+					temp=count&3; \
+					where-=temp; \
+					workingWhere=(CARD32 *)where; \
+					workingVal=*workingWhere++; \
+					count=4-temp; \
+					workingVal<<=(8*temp)
+#define readPacked(where,x,y,z) if(!(x)) { (x)=4; y=*z++; } where=(y)>>24; (y)<<=8; (x)--;
+#define readPackedSource(where) readPacked(where,ws,workingSource,wsrc)
+#define readPackedDest(where) readPacked(where,wd,workingiDest,widst)
+		
+#endif
+		setupPackedReader(ws,wt,src,wsrc,workingSource);
+		setupPackedReader(wd,wt,idst,widst,workingiDest);
+
+		while (w--)
+		{
+			readPackedSource(rs);
+			readPackedSource(gs);
+			readPackedSource(bs);
+
+			readPackedDest(rd);
+			readPackedDest(gd);
+			readPackedDest(bd);
+
+			rd=(rs*maskAlpha+rd*maskiAlpha)>>8;
+			gd=(gs*maskAlpha+gd*maskiAlpha)>>8;
+			bd=(bs*maskAlpha+bd*maskiAlpha)>>8;
+
+			dst[0]=rd;
+			dst[1]=gd;
+			dst[2]=bd;
+			dst+=3;
+		}
+    }
+	}
+}
+
 /*
  * Simple bitblt
  */
@@ -1168,6 +1279,11 @@ fbComposite (CARD8      op,
 		    if (pDst->format == pSrc->format)
 		        func = fbCompositeTrans_0565xnx0565;
 		    break;
+		case PICT_r8g8b8:
+		case PICT_b8g8r8:
+		    if (pDst->format == pSrc->format)
+		        func = fbCompositeTrans_0888xnx0888;
+		    break;
 		}
 		if (func != fbCompositeGeneral)
 		    maskRepeat = FALSE;
@@ -1281,7 +1397,7 @@ fbComposite (CARD8      op,
 	    x_src = pbox->x1 - xDst + xSrc;
 	    x_msk = pbox->x1 - xDst + xMask;
 	    x_dst = pbox->x1;
-	    if (maskRepeat)
+	    if (maskRepeat && pMask->pDrawable->height > 1)
 	    {
 		y_msk = mod (y_msk, pMask->pDrawable->height);
 		if (h_this > pMask->pDrawable->height - y_msk)
@@ -1296,7 +1412,7 @@ fbComposite (CARD8      op,
 	    while (w)
 	    {
 		w_this = w;
-		if (maskRepeat)
+		if (maskRepeat && pMask->pDrawable->width > 1)
 		{
 		    x_msk = mod (x_msk, pMask->pDrawable->width);
 		    if (w_this > pMask->pDrawable->width - x_msk)
Index: fb/fbpict.h
===================================================================
RCS file: /cvs/xserver/xserver/fb/fbpict.h,v
retrieving revision 1.13
diff -u -3 -p -r1.13 fbpict.h
--- fb/fbpict.h	5 Nov 2003 05:45:31 -0000	1.13
+++ fb/fbpict.h	12 Dec 2003 17:41:42 -0000
@@ -969,6 +969,20 @@ fbCompositeTrans_0565xnx0565(CARD8      
 			     CARD16     width,
 			     CARD16     height);
 
+void 
+fbCompositeTrans_0888xnx0888(CARD8      op,
+			     PicturePtr pSrc,
+			     PicturePtr pMask,
+			     PicturePtr pDst,
+			     INT16      xSrc,
+			     INT16      ySrc,
+			     INT16      xMask,
+			     INT16      yMask,
+			     INT16      xDst,
+			     INT16      yDst,
+			     CARD16     width,
+			     CARD16     height);
+
 void
 fbCompositeSrcSrc_nxn  (CARD8	op,
 			PicturePtr pSrc,
Index: hw/kdrive/src/kaa.c
===================================================================
RCS file: /cvs/xserver/xserver/hw/kdrive/src/kaa.c,v
retrieving revision 1.21
diff -u -3 -p -r1.21 kaa.c
--- hw/kdrive/src/kaa.c	20 Nov 2003 07:49:46 -0000	1.21
+++ hw/kdrive/src/kaa.c	12 Dec 2003 17:41:58 -0000
@@ -693,7 +693,9 @@ kaaImageGlyphBlt (DrawablePtr	pDrawable,
     switch (dstBpp) {
     case 8:	glyph = fbGlyph8; break;
     case 16:    glyph = fbGlyph16; break;
+#ifndef FBNO24BIT
     case 24:    glyph = fbGlyph24; break;
+#endif
     case 32:    glyph = fbGlyph32; break;
     }
     
Index: include/servermd.h
===================================================================
RCS file: /cvs/xserver/xserver/include/servermd.h,v
retrieving revision 3.55
diff -u -3 -p -r3.55 servermd.h
--- include/servermd.h	11 Sep 2003 05:12:51 -0000	3.55
+++ include/servermd.h	12 Dec 2003 17:42:03 -0000
@@ -122,6 +122,11 @@ SOFTWARE.
  *	Currently defined for SPARC.
  */
 
+
+// FIXME: document this /jj
+#define WORKING_UNALIGNED_INT
+
+
 #ifdef vax
 
 #define IMAGE_BYTE_ORDER	LSBFirst        /* Values for the VAX only */

--CNfT9TXqV7nd4cfk--