[xserver-commit] xserver/fb fbpict.h,1.13,1.14 fbpict.c,1.18,1.19
Jaymz Julian
xserver-commit@pdx.freedesktop.org
Thu, 18 Dec 2003 05:24:37 -0800
Committed by: jaymz
Update of /cvs/xserver/xserver/fb
In directory pdx:/tmp/cvs-serv30795/fb
Modified Files:
fbpict.h fbpict.c
Log Message:
Change some 16/24 bit composites to use 0xFF00FF/0xFF00FF00 composities
with two multiplies instead of 3/4 multiplies, and make the 24 bit
composites use word aligned accesses whenever convinient. Semantics
are still the same as the old verisons otherwise, including repeat
bogosity, since i need to do significantly more testing on that code
before forcing it on people :-p.
Index: fbpict.h
===================================================================
RCS file: /cvs/xserver/xserver/fb/fbpict.h,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -d -r1.13 -r1.14
--- fbpict.h 5 Nov 2003 05:45:31 -0000 1.13
+++ fbpict.h 18 Dec 2003 13:24:35 -0000 1.14
@@ -969,6 +969,48 @@
CARD16 width,
CARD16 height);
+void
+fbCompositeTrans_0888xnx0888(CARD8 op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+
+void
+fbCompositeTrans_0888xnx0888(CARD8 op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+
+void
+fbCompositeTrans_0888xnx0888(CARD8 op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+
void
fbCompositeSrcSrc_nxn (CARD8 op,
PicturePtr pSrc,
Index: fbpict.c
===================================================================
RCS file: /cvs/xserver/xserver/fb/fbpict.c,v
retrieving revision 1.18
retrieving revision 1.19
diff -u -d -r1.18 -r1.19
--- fbpict.c 5 Nov 2003 05:45:31 -0000 1.18
+++ fbpict.c 18 Dec 2003 13:24:35 -0000 1.19
@@ -161,6 +161,51 @@
(line) = ((type *) __bits__) + (stride) * ((y) + __yoff__) + (mul) * ((x) + __xoff__); \
}
+#define genericCombine24(a,b,c,d) (((a)*(c)+(b)*(d)))
+
+#define fastcombine32(alpha, source, destval, destptr, dstrb, dstag, drb, dag) \
+ dstrb=destval&0xFF00FF; dstag=(destval>>8)&0xFF00FF; \
+ drb=((source&0xFF00FF)-dstrb)*alpha; dag=(((source>>8)&0xFF00FF)-dstag)*alpha; \
+ *destptr++=((((drb>>8) + dstrb) & 0x00FF00FF) | ((((dag>>8) + dstag) << 8) & 0xFF00FF00)); \
+
+#define fastcombine32(alpha, source, destval, destptr, dstrb, dstag, drb, dag) \
+ dstrb=destval&0xFF00FF; dstag=(destval>>8)&0xFF00FF; \
+ drb=((source&0xFF00FF)-dstrb)*alpha; dag=(((source>>8)&0xFF00FF)-dstag)*alpha; \
+ *destptr++=((((drb>>8) + dstrb) & 0x00FF00FF) | ((((dag>>8) + dstag) << 8) & 0xFF00FF00)); \
+
+// Note: this macro expects 6 bits of alpha, not 8!
+#define fastCombine0565(alpha, source, destval, destptr) { \
+ CARD16 dstrb = destval & 0xf81f; CARD16 dstg = destval & 0x7e0; \
+ CARD32 drb = ((source&0xf81f)-dstrb)*alpha; CARD32 dg=((source & 0x7e0)-dstg)*alpha; \
+ destptr= ((((drb>>6) + dstrb)&0xf81f) | (((dg>>6) + dstg) & 0x7e0)); \
+ }
+
+#if IMAGE_BYTE_ORDER == LSBFirst
+ #define setupPackedReader(count,temp,where,workingWhere,workingVal) count=(int)where; \
+ temp=count&3; \
+ where-=temp; \
+ workingWhere=(CARD32 *)where; \
+ workingVal=*workingWhere++; \
+ count=4-temp; \
+ workingVal>>=(8*temp)
+ #define readPacked(where,x,y,z) {if(!(x)) { (x)=4; y=*z++; } where=(y)&0xff; (y)>>=8; (x)--;}
+ #define readPackedSource(where) readPacked(where,ws,workingSource,wsrc)
+ #define readPackedDest(where) readPacked(where,wd,workingiDest,widst)
+ #define writePacked(what) workingoDest>>=8; workingoDest|=(what<<24); ww--; if(!ww) { ww=4; *wodst++=workingoDest; }
+#else
+ #warning "I havn't tested fbCompositeTrans_0888xnx0888() on big endian yet!"
+ #define setupPackedReader(count,temp,where,workingWhere,workingVal) count=(int)where; \
+ temp=count&3; \
+ where-=temp; \
+ workingWhere=(CARD32 *)where; \
+ workingVal=*workingWhere++; \
+ count=4-temp; \
+ workingVal<<=(8*temp)
+ #define readPacked(where,x,y,z) {if(!(x)) { (x)=4; y=*z++; } where=(y)>>24; (y)<<=8; (x)--;}
+ #define readPackedSource(where) readPacked(where,ws,workingSource,wsrc)
+ #define readPackedDest(where) readPacked(where,wd,workingiDest,widst)
+ #define writePacked(what) workingoDest<<=8; workingoDest|=what; ww--; if(!ww) { ww=4; *wodst++=workingoDest; }
+#endif
/*
* Naming convention:
*
@@ -298,6 +343,7 @@
}
}
+#define srcAlphaCombine24(a,b) genericCombine24(a,b,srca,srcia)
void
fbCompositeSolidMask_nx8x0888 (CARD8 op,
PicturePtr pSrc,
@@ -312,52 +358,86 @@
CARD16 width,
CARD16 height)
{
- CARD32 src, srca;
- CARD8 *dstLine, *dst;
+ CARD32 src, srca, srcia;
+ CARD8 *dstLine, *dst, *edst;
CARD32 d;
CARD8 *maskLine, *mask, m;
FbStride dstStride, maskStride;
CARD16 w;
+ CARD32 rs,gs,bs,rd,gd,bd;
fbComposeGetSolid(pSrc, src);
srca = src >> 24;
+ srcia = 255-srca;
if (src == 0)
return;
+
+ rs=src&0xff;
+ gs=(src>>8)&0xff;
+ bs=(src>>16)&0xff;
fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 3);
fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1);
-
- while (height--)
- {
- dst = dstLine;
- dstLine += dstStride;
- mask = maskLine;
- maskLine += maskStride;
- w = width;
- while (w--)
+ while (height--)
{
- m = *mask++;
- if (m == 0xff)
- {
- if (srca == 0xff)
- d = src;
- else
+ // fixme: cleanup unused
+ unsigned int wt,wd;
+ CARD32 workingiDest;
+ CARD32 *widst;
+
+ edst=dst = dstLine;
+ dstLine += dstStride;
+ mask = maskLine;
+ maskLine += maskStride;
+ w = width;
+
+#ifndef NO_MASKED_PACKED_READ
+ setupPackedReader(wd,wt,edst,widst,workingiDest);
+#endif
+
+ while (w--)
{
- d = Fetch24(dst);
- d = fbOver24 (src, d);
+#ifndef NO_MASKED_PACKED_READ
+ readPackedDest(rd);
+ readPackedDest(gd);
+ readPackedDest(bd);
+#else
+ rd= *edst++;
+ gd= *edst++;
+ bd= *edst++;
+#endif
+ m = *mask++;
+ if (m == 0xff)
+ {
+ if (srca == 0xff)
+ {
+ *dst++=rs;
+ *dst++=gs;
+ *dst++=bs;
+ }
+ else
+ {
+ *dst++=(srcAlphaCombine24(rs, rd)>>8);
+ *dst++=(srcAlphaCombine24(gs, gd)>>8);
+ *dst++=(srcAlphaCombine24(bs, bd)>>8);
+ }
+ }
+ else if (m)
+ {
+ int na=(srca*(int)m)>>8;
+ int nia=255-na;
+ *dst++=(genericCombine24(rs, rd, na, nia)>>8);
+ *dst++=(genericCombine24(gs, gd, na, nia)>>8);
+ *dst++=(genericCombine24(bs, bd, na, nia)>>8);
+ }
+ else
+ {
+ dst+=3;
+ }
}
- Store24(dst,d);
- }
- else if (m)
- {
- d = fbOver24 (fbIn(src,m), Fetch24(dst));
- Store24(dst,d);
- }
- dst += 3;
}
- }
}
void
@@ -374,53 +454,57 @@
CARD16 width,
CARD16 height)
{
- CARD32 src, srca;
+ CARD32 src, srca,na, rsrca;
CARD16 *dstLine, *dst;
- CARD32 d;
+ CARD16 d;
CARD8 *maskLine, *mask, m;
FbStride dstStride, maskStride;
- CARD16 w;
+ CARD16 w,src16;
fbComposeGetSolid(pSrc, src);
+ src16 = cvt8888to0565(src);
- srca = src >> 24;
+ rsrca = src >> 24;
+ srca=rsrca>>2;
if (src == 0)
- return;
+ return;
fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1);
fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1);
-
- while (height--)
- {
- dst = dstLine;
- dstLine += dstStride;
- mask = maskLine;
- maskLine += maskStride;
- w = width;
-
- while (w--)
+
+ while (height--)
{
- m = *mask++;
- if (m == 0xff)
- {
- if (srca == 0xff)
- d = src;
- else
+ dst = dstLine;
+ dstLine += dstStride;
+ mask = maskLine;
+ maskLine += maskStride;
+ w = width;
+
+ while (w--)
{
- d = *dst;
- d = fbOver24 (src, cvt0565to8888(d));
+ m = *mask++;
+ if (m == 0xff)
+ {
+ if (srca == 0xff)
+ {
+ *dst=src16;
+ }
+ else
+ {
+ d = *dst;
+ fastCombine0565(srca, src16, d, *dst++);
+ }
+ }
+ else if (m)
+ {
+ na=(rsrca*(int)m)>>10;
+ d = *dst;
+ fastCombine0565(na, src16, d, *dst++);
+ }
+ else
+ dst++;
}
- *dst = cvt8888to0565(d);
- }
- else if (m)
- {
- d = *dst;
- d = fbOver24 (fbIn(src,m), cvt0565to8888(d));
- *dst = cvt8888to0565(d);
- }
- dst++;
}
- }
}
void
@@ -921,7 +1005,7 @@
CARD32 s_32, d_32, i_32, r_32;
fbComposeGetSolid (pMask, mask);
- maskAlpha = mask >> 24;
+ maskAlpha = mask >> 26;
if (!maskAlpha)
return;
@@ -937,26 +1021,272 @@
fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1);
while (height--)
- {
- dst = dstLine;
- dstLine += dstStride;
- src = srcLine;
- srcLine += srcStride;
- w = width;
+ {
+ CARD32 *isrc;
+ dst = dstLine;
+ dstLine += dstStride;
+ src = srcLine;
+ srcLine += srcStride;
+ w = width;
+
+ if(((int)src&1)==1)
+ {
+ s_16 = *src++;
+ d_16 = *dst;
+ fastCombine0565(maskAlpha, s_16, d_16, *dst++);
+ w--;
+ }
+ isrc=(CARD32 *)src;
+ while (w>1)
+ {
+ s_32=*isrc++;
+#if IMAGE_BYTE_ORDER == LSBFirst
+ s_16=s_32&0xffff;
+#else
+ s_16=s_32>>16;
+#endif
+ d_16 = *dst;
+ fastCombine0565(maskAlpha, s_16, d_16, *dst++);
+#if IMAGE_BYTE_ORDER == LSBFirst
+ s_16=s_32>>16;
+#else
+ s_16=s_32&0xffff;
+#endif
+ d_16 = *dst;
+ fastCombine0565(maskAlpha, s_16, d_16, *dst++);
+ w-=2;
+ }
+ src=(CARD16 *)isrc;
+ if(w!=0)
+ {
+ s_16 = *src;
+ d_16 = *dst;
+ fastCombine0565(maskAlpha, s_16, d_16, *dst);
+ }
+ }
+}
+
+
+
+// macros for "i can't believe it's not fast" packed pixel handling
+#define alphamaskCombine24(a,b) genericCombine24(a,b,maskAlpha,maskiAlpha)
+void
+fbCompositeTrans_0888xnx0888(CARD8 op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height)
+{
+ CARD8 *dstLine, *dst,*idst;
+ CARD8 *srcLine, *src;
+ FbStride dstStride, srcStride;
+ CARD16 w;
+ FbBits mask;
+ CARD16 maskAlpha,maskiAlpha;
+
+ fbComposeGetSolid (pMask, mask);
+ maskAlpha = mask >> 24;
+ maskiAlpha= 255-maskAlpha;
+
+ if (!maskAlpha)
+ return;
+ //if (maskAlpha == 0xff)
+ //{
+ //fbCompositeSrc_0888x0888 (op, pSrc, pMask, pDst,
+ // xSrc, ySrc, xMask, yMask, xDst, yDst,
+ // width, height);
+ //return;
+ //}
+
+ fbComposeGetStart (pSrc, xSrc, ySrc, CARD8, srcStride, srcLine, 3);
+ fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 3);
- while (w--)
{
- s_16 = *src++;
- s_32 = cvt0565to8888(s_16);
- d_16 = *dst;
- d_32 = cvt0565to8888(d_16);
-
- i_32 = fbIn24 (s_32, maskAlpha);
- r_32 = fbOver24 (i_32, d_32);
- r_16 = cvt8888to0565(r_32);
- *dst++ = r_16;
+ unsigned int ws,wt,wd,ww;
+ CARD32 workingSource;
+ CARD32 *wsrc;
+ CARD32 rs,gs,bs;
+ CARD32 rd,gd,bd;
+
+ CARD32 workingiDest,workingoDest;
+ CARD32 *widst,*wodst;
+
+
+ // are xSrc and xDst at the same alignment? if not, we need to be complicated :)
+ //if(0==0)
+ if( (((xSrc*3)&3)!=((xDst*3)&3)) || (srcStride&3)!=0 || (dstStride&3)!=0)
+ {
+ while (height--)
+ {
+ idst=dst = dstLine;
+ dstLine += dstStride;
+ src = srcLine;
+ srcLine += srcStride;
+ w = width*3;
+
+ setupPackedReader(wd,wt,idst,widst,workingiDest);
+ ww=(int)dst;
+ wt=ww&3;
+ dst-=wt;
+ wodst=(CARD32 *)dst;
+ workingoDest=*wodst;
+ ww=4-wt;
+#if IMAGE_BYTE_ORDER == LSBFirst
+ workingoDest<<=(8*(ww+1));
+#else
+ workingoDest>>=(8*(ww+1));
+#endif
+
+ // get to word aligned
+ switch(!(int)src&3)
+ {
+ case 1:
+ readPackedDest(rd);
+ rd=alphamaskCombine24(*src++, rd)>>8;
+ writePacked(rd);
+ w--; if(w==0) break;
+ case 2:
+ readPackedDest(rd);
+ rd=alphamaskCombine24(*src++, rd)>>8;
+ writePacked(rd);
+ w--; if(w==0) break;
+ case 3:
+ readPackedDest(rd);
+ rd=alphamaskCombine24(*src++, rd)>>8;
+ writePacked(rd);
+ w--; if(w==0) break;
+ }
+ wsrc=(CARD32 *)src;
+ while (w>3)
+ {
+ rs=*wsrc++;
+ // FIXME: write a version of readPackedDest() which
+ // can collect 4 bytes at once if we're on a boundry (which we're
+ // actually guarenteed not to be in this version, but do it anyhow), and can
+ // collect as 2 16bit words on a 2byte boundry, and then use the 32bit combine here
+#if IMAGE_BYTE_ORDER == LSBFirst
+ readPackedDest(rd);
+ rd=alphamaskCombine24(rs&0xff, rd)>>8;
+ writePacked(rd);
+
+ readPackedDest(rd);
+ rd=alphamaskCombine24((rs>>8)&0xff, rd)>>8;
+ writePacked(rd);
+
+ readPackedDest(rd);
+ rd=alphamaskCombine24((rs>>16)&0xff, rd)>>8;
+ writePacked(rd);
+
+ readPackedDest(rd);
+ rd=alphamaskCombine24(rs>>24, rd)>>8;
+ writePacked(rd);
+#else
+ readPackedDest(rd);
+ rd=alphamaskCombine24(rs>>24, rd)>>8;
+ writePacked(rd);
+
+ readPackedDest(rd);
+ rd=alphamaskCombine24((rs>>16)&0xff, rd)>>8;
+ writePacked(rd);
+
+ readPackedDest(rd);
+ rd=alphamaskCombine24((rs>>8)&0xff, rd)>>8;
+ writePacked(rd);
+
+ readPackedDest(rd);
+ rd=alphamaskCombine24(rs&0xff, rd)>>8;
+ writePacked(rd);
+#endif
+ w-=4;
+ }
+ src=(CARD8 *)wsrc;
+ switch(w)
+ {
+ case 3:
+ readPackedDest(rd);
+ rd=alphamaskCombine24(*src++, rd)>>8;
+ writePacked(rd);
+ case 2:
+ readPackedDest(rd);
+ rd=alphamaskCombine24(*src++, rd)>>8;
+ writePacked(rd);
+ case 1:
+ readPackedDest(rd);
+ rd=alphamaskCombine24(*src++, rd)>>8;
+ writePacked(rd);
+ }
+ dst=(CARD8 *)wodst;
+ switch(ww)
+ {
+ case 1:
+ dst[2]=(workingoDest>>8)&0xff;
+ case 2:
+ dst[1]=(workingoDest>>16)&0xff;
+ case 3:
+ dst[0]=workingoDest>>24;
+ }
+ }
+ }
+ else
+ {
+ while (height--)
+ {
+ idst=dst = dstLine;
+ dstLine += dstStride;
+ src = srcLine;
+ srcLine += srcStride;
+ w = width*3;
+ // get to word aligned
+ switch(!(int)src&3)
+ {
+ case 1:
+ rd=alphamaskCombine24(*src++, *dst)>>8;
+ *dst++=rd;
+ w--; if(w==0) break;
+ case 2:
+ rd=alphamaskCombine24(*src++, *dst)>>8;
+ *dst++=rd;
+ w--; if(w==0) break;
+ case 3:
+ rd=alphamaskCombine24(*src++, *dst)>>8;
+ *dst++=rd;
+ w--; if(w==0) break;
+ }
+ wsrc=(CARD32 *)src;
+ widst=(CARD32 *)dst;
+
+ register CARD32 t1, t2, t3, t4;
+ while(w>3)
+ {
+ rs = *wsrc++;
+ rd = *widst;
+ fastcombine32(maskAlpha, rs, rd, widst, t1, t2, t3, t4);
+ w-=4;
+ }
+ src=(CARD8 *)wsrc;
+ dst=(CARD8 *)widst;
+ switch(w)
+ {
+ case 3:
+ rd=alphamaskCombine24(*src++, *dst)>>8;
+ *dst++=rd;
+ case 2:
+ rd=alphamaskCombine24(*src++, *dst)>>8;
+ *dst++=rd;
+ case 1:
+ rd=alphamaskCombine24(*src++, *dst)>>8;
+ *dst++=rd;
+ }
+ }
+ }
}
- }
}
/*
@@ -1168,6 +1498,11 @@
if (pDst->format == pSrc->format)
func = fbCompositeTrans_0565xnx0565;
break;
+ case PICT_r8g8b8:
+ case PICT_b8g8r8:
+ if (pDst->format == pSrc->format)
+ func = fbCompositeTrans_0888xnx0888;
+ break;
}
if (func != fbCompositeGeneral)
maskRepeat = FALSE;