xf86-video-ati: Branch 'r6xx-r7xx-support' - 3 commits

Alex Deucher agd5f at kemper.freedesktop.org
Thu Feb 12 11:54:18 PST 2009


 src/r600_exa.c |  182 +++++++++++++++++++++++++++++++++++----------------------
 src/radeon.h   |    5 +
 2 files changed, 118 insertions(+), 69 deletions(-)

New commits:
commit e22cd4011b9be437ba89bff568e7fb82b4907d99
Author: Yang Zhao <yang at yangman.ca>
Date:   Thu Feb 12 14:46:53 2009 -0500

    R6xx/R7xx EXA: Further optimizations to overlapping copy
    
    Diagonal overlapping copies can be reduced to either horizontal- or
    vertical-only offset, and the one with fewer copies is picked.

diff --git a/src/r600_exa.c b/src/r600_exa.c
index 2cff645..8a16b7a 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -750,87 +750,110 @@ R600OverlapCopy(PixmapPtr pDst,
     struct radeon_accel_state *accel_state = info->accel_state;
     uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
     uint32_t dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
-    int i, chunk;
+    int i, hchunk, vchunk;
 
     if (is_overlap(srcX, srcX + w, srcY, srcY + h,
 		   dstX, dstX + w, dstY, dstY + h)) {
-        /* Diagonally offset overlap is reduced to a horizontal-only offset by first
-         * copying the vertically non-overlapping portion, then adjusting coordinates
+        /* Calculate height/width of non-overlapping area */
+        hchunk = (srcX < dstX) ? (dstX - srcX) : (srcX - dstX);
+        vchunk = (srcY < dstY) ? (dstY - srcY) : (srcY - dstY);
+
+        /* Diagonally offset overlap is reduced to either horizontal or vertical offset-only
+         * by copying a part of the  non-overlapping portion, then adjusting coordinates
+         * Choose horizontal vs vertical to minimize the total number of copy operations
          */
-	if (srcX != dstX) { // left/right or diagonal
-            if (srcY > dstY ) { // diagonal up
-                chunk = srcY - dstY;
-                R600DoPrepareCopy(pScrn,
-                                  dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
-                                  dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
-                                  accel_state->rop, accel_state->planemask);
-                R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, chunk);
-                R600DoCopy(pScrn);
-
-                h = h - chunk;
-                srcY = srcY + chunk;
-                dstY = dstY + chunk;
-            } else if (srcY < dstY) { // diagonal down
-                chunk = dstY - srcY;
-                R600DoPrepareCopy(pScrn,
-                                  dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
-                                  dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
-                                  accel_state->rop, accel_state->planemask);
-                R600AppendCopyVertex(pScrn, srcX, srcY + h - chunk, dstX, dstY + h - chunk, w, chunk);
-                R600DoCopy(pScrn);
-
-                h = h - chunk;
+        if (vchunk != 0 && hchunk != 0) { //diagonal
+            if ((w / hchunk) <= (h / vchunk)) { // reduce to horizontal
+                if (srcY > dstY ) { // diagonal up
+                    R600DoPrepareCopy(pScrn,
+                                      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+                                      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+                                      accel_state->rop, accel_state->planemask);
+                    R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, vchunk);
+                    R600DoCopy(pScrn);
+
+                    srcY = srcY + vchunk;
+                    dstY = dstY + vchunk;
+                } else { // diagonal down
+                    R600DoPrepareCopy(pScrn,
+                                      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+                                      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+                                      accel_state->rop, accel_state->planemask);
+                    R600AppendCopyVertex(pScrn, srcX, srcY + h - vchunk, dstX, dstY + h - vchunk, w, vchunk);
+                    R600DoCopy(pScrn);
+                }
+                h = h - vchunk;
+                vchunk = 0;
+            } else { //reduce to vertical
+                if (srcX > dstX ) { // diagonal left
+                    R600DoPrepareCopy(pScrn,
+                                      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+                                      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+                                      accel_state->rop, accel_state->planemask);
+                    R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, hchunk, h);
+                    R600DoCopy(pScrn);
+
+                    srcX = srcX + hchunk;
+                    dstX = dstX + hchunk;
+                } else { // diagonal right
+                    R600DoPrepareCopy(pScrn,
+                                      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+                                      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+                                      accel_state->rop, accel_state->planemask);
+                    R600AppendCopyVertex(pScrn, srcX + w - hchunk, srcY, dstX + w - hchunk, dstY, hchunk, h);
+                    R600DoCopy(pScrn);
+                }
+                w = w - hchunk;
+                hchunk = 0;
             }
+        }
 
+	if (vchunk == 0) { // left/right
 	    if (srcX < dstX) { // right
 		// copy right to left
-                chunk = dstX - srcX;
-		for (i = w; i > 0; i -= chunk) {
+		for (i = w; i > 0; i -= hchunk) {
 		    R600DoPrepareCopy(pScrn,
 				      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 				      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 				      accel_state->rop, accel_state->planemask);
-		    R600AppendCopyVertex(pScrn, srcX + i - chunk, srcY, dstX + i - chunk, dstY, chunk, h);
+		    R600AppendCopyVertex(pScrn, srcX + i - hchunk, srcY, dstX + i - hchunk, dstY, hchunk, h);
 		    R600DoCopy(pScrn);
 		}
 	    } else { //left
 		// copy left to right
-                chunk = srcX - dstX;
-		for (i = 0; i < w; i += chunk) {
+		for (i = 0; i < w; i += hchunk) {
 		    R600DoPrepareCopy(pScrn,
 				      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 				      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 				      accel_state->rop, accel_state->planemask);
 
-		    R600AppendCopyVertex(pScrn, srcX + i, srcY, dstX + i, dstY, chunk, h);
+		    R600AppendCopyVertex(pScrn, srcX + i, srcY, dstX + i, dstY, hchunk, h);
 		    R600DoCopy(pScrn);
 		}
 	    }
 	} else { //up/down
 	    if (srcY > dstY) { // up
 		// copy top to bottom
-                for (i = 0; i < h; i += chunk) {
-                chunk = srcY - dstY;
+                for (i = 0; i < h; i += vchunk) {
                     R600DoPrepareCopy(pScrn,
                                       dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
                                       dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
                                       accel_state->rop, accel_state->planemask);
 
-                    if (chunk > h - i) chunk = h - i;
-                    R600AppendCopyVertex(pScrn, srcX, srcY + i, dstX, dstY + i, w, chunk);
+                    if (vchunk > h - i) vchunk = h - i;
+                    R600AppendCopyVertex(pScrn, srcX, srcY + i, dstX, dstY + i, w, vchunk);
                     R600DoCopy(pScrn);
                 }
 	    } else { // down
 		// copy bottom to top
-		chunk = dstY - srcY;
-                for (i = h; i > 0; i -= chunk) {
+                for (i = h; i > 0; i -= vchunk) {
                     R600DoPrepareCopy(pScrn,
                                       dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
                                       dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
                                       accel_state->rop, accel_state->planemask);
 
-                    if (chunk > i) chunk = i;
-                    R600AppendCopyVertex(pScrn, srcX, srcY + i - chunk, dstX, dstY + i - chunk, w, chunk);
+                    if (vchunk > i) vchunk = i;
+                    R600AppendCopyVertex(pScrn, srcX, srcY + i - vchunk, dstX, dstY + i - vchunk, w, vchunk);
                     R600DoCopy(pScrn);
                 }
             }
commit da08b760bcf3d04d775c4440fafec10657bb1863
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Thu Feb 12 13:53:11 2009 -0500

    R6xx/R7xx EXA: handle running out of vertex space in the copy path

diff --git a/src/r600_exa.c b/src/r600_exa.c
index 1e3bd74..2cff645 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -419,6 +419,9 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn,
     accel_state->src_size[0] = src_pitch * src_height * (src_bpp/8);
     accel_state->src_mc_addr[0] = src_offset;
     accel_state->src_pitch[0] = src_pitch;
+    accel_state->src_width[0] = src_width;
+    accel_state->src_height[0] = src_height;
+    accel_state->src_bpp[0] = src_bpp;
 
     /* flush texture cache */
     cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
@@ -486,6 +489,8 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn,
     accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8);
     accel_state->dst_mc_addr = dst_offset;
     accel_state->dst_pitch = dst_pitch;
+    accel_state->dst_height = dst_height;
+    accel_state->dst_bpp = dst_bpp;
 
     cb_conf.id = 0;
     cb_conf.w = accel_state->dst_pitch;
@@ -602,14 +607,25 @@ R600AppendCopyVertex(ScrnInfoPtr pScrn,
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
     struct radeon_accel_state *accel_state = info->accel_state;
-    struct r6xx_copy_vertex *copy_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2));
+    struct r6xx_copy_vertex *copy_vb;
     struct r6xx_copy_vertex vertex[3];
 
     if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) {
-	ErrorF("Copy: Ran out of VB space!\n");
-	return;
+	//ErrorF("Copy: Ran out of VB space!\n");
+	// emit the old VB
+	R600DoCopy(pScrn);
+	// start a new one
+	R600DoPrepareCopy(pScrn,
+			  accel_state->src_pitch[0], accel_state->src_width[0], accel_state->src_height[0],
+			  accel_state->src_mc_addr[0], accel_state->src_bpp[0],
+			  accel_state->dst_pitch, accel_state->dst_height,
+			  accel_state->dst_mc_addr, accel_state->dst_bpp,
+			  accel_state->rop, accel_state->planemask);
+
     }
 
+    copy_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2));
+
     vertex[0].x = (float)dstX;
     vertex[0].y = (float)dstY;
     vertex[0].s = (float)srcX;
@@ -654,6 +670,12 @@ R600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
     accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
     accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
 
+    accel_state->src_width[0] = pSrc->drawable.width;
+    accel_state->src_height[0] = pSrc->drawable.height;
+    accel_state->src_bpp[0] = pSrc->drawable.bitsPerPixel;
+    accel_state->dst_height = pDst->drawable.height;
+    accel_state->dst_bpp = pDst->drawable.bitsPerPixel;
+
     // bad pitch
     if (accel_state->src_pitch[0] & 7)
 	return FALSE;
@@ -680,10 +702,11 @@ R600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
 	   pDst->drawable.bitsPerPixel, exaGetPixmapPitch(pDst));
 #endif
 
+    accel_state->rop = rop;
+    accel_state->planemask = planemask;
+
     if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst)) {
 	accel_state->same_surface = TRUE;
-	accel_state->rop = rop;
-	accel_state->planemask = planemask;
 
 #ifdef SHOW_VERTEXES
 	ErrorF("same surface!\n");
diff --git a/src/radeon.h b/src/radeon.h
index 9b42afd..bad55bf 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -665,9 +665,14 @@ struct radeon_accel_state {
     uint32_t          src_size[2];
     uint64_t          src_mc_addr[2];
     uint32_t          src_pitch[2];
+    uint32_t          src_width[2];
+    uint32_t          src_height[2];
+    uint32_t          src_bpp[2];
     uint32_t          dst_size;
     uint64_t          dst_mc_addr;
     uint32_t          dst_pitch;
+    uint32_t          dst_height;
+    uint32_t          dst_bpp;
     uint32_t          vs_size;
     uint64_t          vs_mc_addr;
     uint32_t          ps_size;
commit e3be312b0b73982c24f1f5d9cf76d7caafae0853
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Thu Feb 12 13:48:36 2009 -0500

    R6xx/R7xx EXA: properly handle non repeat cases in the texture setup

diff --git a/src/r600_exa.c b/src/r600_exa.c
index 8ae5b53..1e3bd74 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -1077,25 +1077,21 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
     tex_res.request_size        = 1;
 
     /* component swizzles */
-    // XXX double check these
     switch (pPict->format) {
     case PICT_a1r5g5b5:
     case PICT_a8r8g8b8:
-	//ErrorF("%s: PICT_a8r8g8b8\n", unit ? "mask" : "src");
 	tex_res.dst_sel_x           = SQ_SEL_Z; //R
 	tex_res.dst_sel_y           = SQ_SEL_Y; //G
 	tex_res.dst_sel_z           = SQ_SEL_X; //B
 	tex_res.dst_sel_w           = SQ_SEL_W; //A
 	break;
     case PICT_a8b8g8r8:
-	//ErrorF("%s: PICT_a8b8g8r8\n", unit ? "mask" : "src");
 	tex_res.dst_sel_x           = SQ_SEL_X; //R
 	tex_res.dst_sel_y           = SQ_SEL_Y; //G
 	tex_res.dst_sel_z           = SQ_SEL_Z; //B
 	tex_res.dst_sel_w           = SQ_SEL_W; //A
 	break;
     case PICT_x8b8g8r8:
-	//ErrorF("%s: PICT_x8b8g8r8\n", unit ? "mask" : "src");
 	tex_res.dst_sel_x           = SQ_SEL_X; //R
 	tex_res.dst_sel_y           = SQ_SEL_Y; //G
 	tex_res.dst_sel_z           = SQ_SEL_Z; //B
@@ -1103,21 +1099,18 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
 	break;
     case PICT_x1r5g5b5:
     case PICT_x8r8g8b8:
-	//ErrorF("%s: PICT_x8r8g8b8\n", unit ? "mask" : "src");
 	tex_res.dst_sel_x           = SQ_SEL_Z; //R
 	tex_res.dst_sel_y           = SQ_SEL_Y; //G
 	tex_res.dst_sel_z           = SQ_SEL_X; //B
 	tex_res.dst_sel_w           = SQ_SEL_1; //A
 	break;
     case PICT_r5g6b5:
-	//ErrorF("%s: PICT_r5g6b5\n", unit ? "mask" : "src");
 	tex_res.dst_sel_x           = SQ_SEL_Z; //R
 	tex_res.dst_sel_y           = SQ_SEL_Y; //G
 	tex_res.dst_sel_z           = SQ_SEL_X; //B
 	tex_res.dst_sel_w           = SQ_SEL_1; //A
 	break;
     case PICT_a8:
-	//ErrorF("%s: PICT_a8\n", unit ? "mask" : "src");
 	tex_res.dst_sel_x           = SQ_SEL_0; //R
 	tex_res.dst_sel_y           = SQ_SEL_0; //G
 	tex_res.dst_sel_z           = SQ_SEL_0; //B
@@ -1135,25 +1128,30 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
     tex_samp.id                 = unit;
     tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
 
-    switch (pPict->repeatType) {
-    case RepeatNormal:
-	tex_samp.clamp_x            = SQ_TEX_WRAP;
-	tex_samp.clamp_y            = SQ_TEX_WRAP;
-	break;
-    case RepeatPad:
-	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
-	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
-	break;
-    case RepeatReflect:
-	tex_samp.clamp_x            = SQ_TEX_MIRROR;
-	tex_samp.clamp_y            = SQ_TEX_MIRROR;
-	break;
-    case RepeatNone:
+    if (pPict->repeat) {
+	switch (pPict->repeatType) {
+	case RepeatNormal:
+	    tex_samp.clamp_x            = SQ_TEX_WRAP;
+	    tex_samp.clamp_y            = SQ_TEX_WRAP;
+	    break;
+	case RepeatPad:
+	    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
+	    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
+	    break;
+	case RepeatReflect:
+	    tex_samp.clamp_x            = SQ_TEX_MIRROR;
+	    tex_samp.clamp_y            = SQ_TEX_MIRROR;
+	    break;
+	case RepeatNone:
+	    tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
+	    tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
+	    break;
+	default:
+	    RADEON_FALLBACK(("Bad repeat 0x%x\n", pPict->repeatType));
+	}
+    } else {
 	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
 	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
-	break;
-    default:
-	RADEON_FALLBACK(("Bad repeat 0x%x\n", pPict->repeatType));
     }
 
     switch (pPict->filter) {


More information about the xorg-commit mailing list