[xserver-commit] xserver/fb fbpict.c,1.20,1.21

Jaymz Julian xserver-commit@pdx.freedesktop.org
Fri, 19 Dec 2003 02:28:12 -0800


Committed by: jaymz

Update of /cvs/xserver/xserver/fb
In directory pdx:/tmp/cvs-serv9131/fb

Modified Files:
	fbpict.c 
Log Message:

Optimize fbCompositeTrans_0888xnx0888()'s unaligned path by doing the 
unpacking on source rather than dest (which reuiqres half the amount of
variables to do so), which also has the effect of making the code look
slightly less on crack (it isn't, but it looks less on crack :-p)



Index: fbpict.c
===================================================================
RCS file: /cvs/xserver/xserver/fb/fbpict.c,v
retrieving revision 1.20
retrieving revision 1.21
diff -u -d -r1.20 -r1.21
--- fbpict.c	18 Dec 2003 17:12:38 -0000	1.20
+++ fbpict.c	19 Dec 2003 10:28:09 -0000	1.21
@@ -163,16 +163,12 @@
 
 #define genericCombine24(a,b,c,d) (((a)*(c)+(b)*(d)))
 
-#define fastcombine32(alpha, source, destval, destptr, dstrb, dstag, drb, dag) \
-	dstrb=destval&0xFF00FF; dstag=(destval>>8)&0xFF00FF; \
-	drb=((source&0xFF00FF)-dstrb)*alpha; dag=(((source>>8)&0xFF00FF)-dstag)*alpha; \
+#define fastcombine32(alpha, source, destval, destptr) { \
+	CARD32 dstrb=destval&0xFF00FF; CARD32 dstag=(destval>>8)&0xFF00FF; \
+	CARD32 drb=((source&0xFF00FF)-dstrb)*alpha; CARD32 dag=(((source>>8)&0xFF00FF)-dstag)*alpha; \
 	*destptr++=((((drb>>8) + dstrb) & 0x00FF00FF) | ((((dag>>8) + dstag) << 8) & 0xFF00FF00)); \
+	}
 
-#define fastcombine32(alpha, source, destval, destptr, dstrb, dstag, drb, dag) \
-	dstrb=destval&0xFF00FF; dstag=(destval>>8)&0xFF00FF; \
-	drb=((source&0xFF00FF)-dstrb)*alpha; dag=(((source>>8)&0xFF00FF)-dstag)*alpha; \
-	*destptr++=((((drb>>8) + dstrb) & 0x00FF00FF) | ((((dag>>8) + dstag) << 8) & 0xFF00FF00)); \
-	
 // Note: this macro expects 6 bits of alpha, not 8!
 #define fastCombine0565(alpha, source, destval, destptr) { \
 	CARD16 dstrb = destval & 0xf81f; CARD16 dstg  = destval & 0x7e0; \
@@ -241,7 +237,7 @@
     
     fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1);
     fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1);
-    
+	
     while (height--)
     {
 	dst = dstLine;
@@ -934,7 +930,7 @@
     int		dstXoff, dstYoff;
     int		maskXoff, maskYoff;
     FbBits	src;
-    
+
     fbComposeGetSolid(pSrc, src);
 
     if ((src & 0xff000000) != 0xff000000)
@@ -1109,15 +1105,11 @@
     fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 3);
 
 	{
-		unsigned int ws,wt,wd,ww;
+		unsigned int ws,wt,ww;
 		CARD32 workingSource;
-		CARD32 *wsrc;
-		CARD32 rs,gs,bs;
-		CARD32 rd,gd,bd;
-
-		CARD32 workingiDest,workingoDest;
-		CARD32 *widst,*wodst;
-
+		CARD32 *wsrc, *wdst, *widst;
+		CARD32 rs, rd, nd;
+		CARD8 *isrc;
 
 		// are xSrc and xDst at the same alignment?  if not, we need to be complicated :)
 		//if(0==0)
@@ -1125,112 +1117,77 @@
 		{
 			while (height--)
 			{
-				idst=dst = dstLine;
+				dst = dstLine;
 				dstLine += dstStride;
-				src = srcLine;
+				isrc = src = srcLine;
 				srcLine += srcStride;
 				w = width*3;
 				
-				setupPackedReader(wd,wt,idst,widst,workingiDest);
-				ww=(int)dst;
-				wt=ww&3;
-				dst-=wt; 
-				wodst=(CARD32 *)dst; 
-				workingoDest=*wodst; 
-				ww=4-wt;
-#if IMAGE_BYTE_ORDER == LSBFirst
-				workingoDest<<=(8*(ww+1));
-#else
-				workingoDest>>=(8*(ww+1));
-#endif
+				setupPackedReader(ws,wt,isrc,wsrc,workingSource);
 
 				// get to word aligned
-				switch(!(int)src&3)
+				switch(!(int)dst&3)
 				{
 					case 1:
-						readPackedDest(rd);
-						rd=alphamaskCombine24(*src++, rd)>>8;
-						writePacked(rd);
+						readPackedSource(rs);
+						//*dst++=alphamaskCombine24(rs, *dst)>>8;
+						rd=*dst;  // make gcc happy.  hope it doens't cost us too much performance
+						*dst++=alphamaskCombine24(rs, rd)>>8;
 						w--; if(w==0) break;
 					case 2:
-						readPackedDest(rd);
-						rd=alphamaskCombine24(*src++, rd)>>8;
-						writePacked(rd);
+						readPackedSource(rs);
+						rd=*dst;  
+						*dst++=alphamaskCombine24(rs, rd)>>8;
 						w--; if(w==0) break;
 					case 3:
-						readPackedDest(rd);
-						rd=alphamaskCombine24(*src++, rd)>>8;
-						writePacked(rd);
+						readPackedSource(rs);
+						rd=*dst;  
+						*dst++=alphamaskCombine24(rs, rd)>>8;
 						w--; if(w==0) break;
 				}
-				wsrc=(CARD32 *)src;
+				wdst=(CARD32 *)dst;
 				while (w>3)
 				{
-					rs=*wsrc++;
-					// FIXME: write a version of readPackedDest() which
-					// can collect 4 bytes at once if we're on a boundry (which we're
-					// actually guarenteed not to be in this version, but do it anyhow), and can
-					// collect as 2 16bit words on a 2byte boundry, and then use the 32bit combine here
-#if IMAGE_BYTE_ORDER == LSBFirst
-					readPackedDest(rd);
-					rd=alphamaskCombine24(rs&0xff, rd)>>8;
-					writePacked(rd);
+					// FIXME: write a special readPackedWord
+					// macro, which knows how to halfword combine
 
-					readPackedDest(rd);
-					rd=alphamaskCombine24((rs>>8)&0xff, rd)>>8;
-					writePacked(rd);
-					
-					readPackedDest(rd);
-					rd=alphamaskCombine24((rs>>16)&0xff, rd)>>8;
-					writePacked(rd);
-					
-					readPackedDest(rd);
-					rd=alphamaskCombine24(rs>>24, rd)>>8;
-					writePacked(rd);
+#if IMAGE_BYTE_ORDER == LSBFirst
+					rd=*wdst;
+					readPackedSource(nd);
+					readPackedSource(rs);
+					nd|=rs<<8;
+					readPackedSource(rs);
+					nd|=rs<<16;
+					readPackedSource(rs);
+					nd|=rs<<24;
 #else
-					readPackedDest(rd);
-					rd=alphamaskCombine24(rs>>24, rd)>>8;
-					writePacked(rd);
-					
-					readPackedDest(rd);
-					rd=alphamaskCombine24((rs>>16)&0xff, rd)>>8;
-					writePacked(rd);
-					
-					readPackedDest(rd);
-					rd=alphamaskCombine24((rs>>8)&0xff, rd)>>8;
-					writePacked(rd);
-
-					readPackedDest(rd);
-					rd=alphamaskCombine24(rs&0xff, rd)>>8;
-					writePacked(rd);
+					readPackedSource(nd);
+					nd<<=24;
+					readPackedSource(rs);
+					nd|=rs<<16;
+					readPackedSource(rs);
+					nd|=rs<<8;
+					readPackedSource(rs);
+					nd|=rs;
 #endif
+					fastcombine32(maskAlpha, nd, rd, wdst)
 					w-=4;
 				}
-				src=(CARD8 *)wsrc;
+				dst=(CARD8 *)wdst;
 				switch(w)
 				{
 					case 3:
-						readPackedDest(rd);
-						rd=alphamaskCombine24(*src++, rd)>>8;
-						writePacked(rd);
+						readPackedSource(rs);
+						rd=*dst;  
+						*dst++=alphamaskCombine24(rs, rd)>>8;
 					case 2:
-						readPackedDest(rd);
-						rd=alphamaskCombine24(*src++, rd)>>8;
-						writePacked(rd);
-					case 1:
-						readPackedDest(rd);
-						rd=alphamaskCombine24(*src++, rd)>>8;
-						writePacked(rd);
-				}
-				dst=(CARD8 *)wodst;
-				switch(ww)
-				{
+						readPackedSource(rs);
+						rd=*dst;  
+						*dst++=alphamaskCombine24(rs, rd)>>8;
 					case 1:
-						dst[2]=(workingoDest>>8)&0xff;
-					case 2:
-						dst[1]=(workingoDest>>16)&0xff;
-					case 3:
-						dst[0]=workingoDest>>24;
+						readPackedSource(rs);
+						rd=*dst;  
+						*dst++=alphamaskCombine24(rs, rd)>>8;
 				}
 			}
 		}
@@ -1262,12 +1219,11 @@
 				wsrc=(CARD32 *)src;
 				widst=(CARD32 *)dst;
 
-				register CARD32 t1, t2, t3, t4;
 				while(w>3)
 				{
 					rs = *wsrc++;
 					rd = *widst;
-					fastcombine32(maskAlpha, rs, rd, widst, t1, t2, t3, t4);
+					fastcombine32(maskAlpha, rs, rd, widst);
 					w-=4;
 				}
 				src=(CARD8 *)wsrc;