xf86-video-ati: Branch 'r6xx-r7xx-support' - 3 commits
Alex Deucher
agd5f at kemper.freedesktop.org
Thu Feb 12 11:54:18 PST 2009
src/r600_exa.c | 182 +++++++++++++++++++++++++++++++++++----------------------
src/radeon.h | 5 +
2 files changed, 118 insertions(+), 69 deletions(-)
New commits:
commit e22cd4011b9be437ba89bff568e7fb82b4907d99
Author: Yang Zhao <yang at yangman.ca>
Date: Thu Feb 12 14:46:53 2009 -0500
R6xx/R7xx EXA: Further optimizations to overlapping copy
Diagonal overlapping copies can be reduced to either horizontal- or
vertical-only offset, and the one with fewer copies is picked.
diff --git a/src/r600_exa.c b/src/r600_exa.c
index 2cff645..8a16b7a 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -750,87 +750,110 @@ R600OverlapCopy(PixmapPtr pDst,
struct radeon_accel_state *accel_state = info->accel_state;
uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
uint32_t dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
- int i, chunk;
+ int i, hchunk, vchunk;
if (is_overlap(srcX, srcX + w, srcY, srcY + h,
dstX, dstX + w, dstY, dstY + h)) {
- /* Diagonally offset overlap is reduced to a horizontal-only offset by first
- * copying the vertically non-overlapping portion, then adjusting coordinates
+ /* Calculate height/width of non-overlapping area */
+ hchunk = (srcX < dstX) ? (dstX - srcX) : (srcX - dstX);
+ vchunk = (srcY < dstY) ? (dstY - srcY) : (srcY - dstY);
+
+ /* Diagonally offset overlap is reduced to either horizontal or vertical offset-only
+ * by copying a part of the non-overlapping portion, then adjusting coordinates
+ * Choose horizontal vs vertical to minimize the total number of copy operations
*/
- if (srcX != dstX) { // left/right or diagonal
- if (srcY > dstY ) { // diagonal up
- chunk = srcY - dstY;
- R600DoPrepareCopy(pScrn,
- dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
- dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
- accel_state->rop, accel_state->planemask);
- R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, chunk);
- R600DoCopy(pScrn);
-
- h = h - chunk;
- srcY = srcY + chunk;
- dstY = dstY + chunk;
- } else if (srcY < dstY) { // diagonal down
- chunk = dstY - srcY;
- R600DoPrepareCopy(pScrn,
- dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
- dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
- accel_state->rop, accel_state->planemask);
- R600AppendCopyVertex(pScrn, srcX, srcY + h - chunk, dstX, dstY + h - chunk, w, chunk);
- R600DoCopy(pScrn);
-
- h = h - chunk;
+ if (vchunk != 0 && hchunk != 0) { //diagonal
+ if ((w / hchunk) <= (h / vchunk)) { // reduce to horizontal
+ if (srcY > dstY ) { // diagonal up
+ R600DoPrepareCopy(pScrn,
+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop, accel_state->planemask);
+ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, vchunk);
+ R600DoCopy(pScrn);
+
+ srcY = srcY + vchunk;
+ dstY = dstY + vchunk;
+ } else { // diagonal down
+ R600DoPrepareCopy(pScrn,
+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop, accel_state->planemask);
+ R600AppendCopyVertex(pScrn, srcX, srcY + h - vchunk, dstX, dstY + h - vchunk, w, vchunk);
+ R600DoCopy(pScrn);
+ }
+ h = h - vchunk;
+ vchunk = 0;
+ } else { //reduce to vertical
+ if (srcX > dstX ) { // diagonal left
+ R600DoPrepareCopy(pScrn,
+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop, accel_state->planemask);
+ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, hchunk, h);
+ R600DoCopy(pScrn);
+
+ srcX = srcX + hchunk;
+ dstX = dstX + hchunk;
+ } else { // diagonal right
+ R600DoPrepareCopy(pScrn,
+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop, accel_state->planemask);
+ R600AppendCopyVertex(pScrn, srcX + w - hchunk, srcY, dstX + w - hchunk, dstY, hchunk, h);
+ R600DoCopy(pScrn);
+ }
+ w = w - hchunk;
+ hchunk = 0;
}
+ }
+ if (vchunk == 0) { // left/right
if (srcX < dstX) { // right
// copy right to left
- chunk = dstX - srcX;
- for (i = w; i > 0; i -= chunk) {
+ for (i = w; i > 0; i -= hchunk) {
R600DoPrepareCopy(pScrn,
dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
accel_state->rop, accel_state->planemask);
- R600AppendCopyVertex(pScrn, srcX + i - chunk, srcY, dstX + i - chunk, dstY, chunk, h);
+ R600AppendCopyVertex(pScrn, srcX + i - hchunk, srcY, dstX + i - hchunk, dstY, hchunk, h);
R600DoCopy(pScrn);
}
} else { //left
// copy left to right
- chunk = srcX - dstX;
- for (i = 0; i < w; i += chunk) {
+ for (i = 0; i < w; i += hchunk) {
R600DoPrepareCopy(pScrn,
dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
accel_state->rop, accel_state->planemask);
- R600AppendCopyVertex(pScrn, srcX + i, srcY, dstX + i, dstY, chunk, h);
+ R600AppendCopyVertex(pScrn, srcX + i, srcY, dstX + i, dstY, hchunk, h);
R600DoCopy(pScrn);
}
}
} else { //up/down
if (srcY > dstY) { // up
// copy top to bottom
- for (i = 0; i < h; i += chunk) {
- chunk = srcY - dstY;
+ for (i = 0; i < h; i += vchunk) {
R600DoPrepareCopy(pScrn,
dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
accel_state->rop, accel_state->planemask);
- if (chunk > h - i) chunk = h - i;
- R600AppendCopyVertex(pScrn, srcX, srcY + i, dstX, dstY + i, w, chunk);
+ if (vchunk > h - i) vchunk = h - i;
+ R600AppendCopyVertex(pScrn, srcX, srcY + i, dstX, dstY + i, w, vchunk);
R600DoCopy(pScrn);
}
} else { // down
// copy bottom to top
- chunk = dstY - srcY;
- for (i = h; i > 0; i -= chunk) {
+ for (i = h; i > 0; i -= vchunk) {
R600DoPrepareCopy(pScrn,
dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
accel_state->rop, accel_state->planemask);
- if (chunk > i) chunk = i;
- R600AppendCopyVertex(pScrn, srcX, srcY + i - chunk, dstX, dstY + i - chunk, w, chunk);
+ if (vchunk > i) vchunk = i;
+ R600AppendCopyVertex(pScrn, srcX, srcY + i - vchunk, dstX, dstY + i - vchunk, w, vchunk);
R600DoCopy(pScrn);
}
}
commit da08b760bcf3d04d775c4440fafec10657bb1863
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Thu Feb 12 13:53:11 2009 -0500
R6xx/R7xx EXA: handle running out of vertex space in the copy path
diff --git a/src/r600_exa.c b/src/r600_exa.c
index 1e3bd74..2cff645 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -419,6 +419,9 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn,
accel_state->src_size[0] = src_pitch * src_height * (src_bpp/8);
accel_state->src_mc_addr[0] = src_offset;
accel_state->src_pitch[0] = src_pitch;
+ accel_state->src_width[0] = src_width;
+ accel_state->src_height[0] = src_height;
+ accel_state->src_bpp[0] = src_bpp;
/* flush texture cache */
cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
@@ -486,6 +489,8 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn,
accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8);
accel_state->dst_mc_addr = dst_offset;
accel_state->dst_pitch = dst_pitch;
+ accel_state->dst_height = dst_height;
+ accel_state->dst_bpp = dst_bpp;
cb_conf.id = 0;
cb_conf.w = accel_state->dst_pitch;
@@ -602,14 +607,25 @@ R600AppendCopyVertex(ScrnInfoPtr pScrn,
{
RADEONInfoPtr info = RADEONPTR(pScrn);
struct radeon_accel_state *accel_state = info->accel_state;
- struct r6xx_copy_vertex *copy_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2));
+ struct r6xx_copy_vertex *copy_vb;
struct r6xx_copy_vertex vertex[3];
if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) {
- ErrorF("Copy: Ran out of VB space!\n");
- return;
+ //ErrorF("Copy: Ran out of VB space!\n");
+ // emit the old VB
+ R600DoCopy(pScrn);
+ // start a new one
+ R600DoPrepareCopy(pScrn,
+ accel_state->src_pitch[0], accel_state->src_width[0], accel_state->src_height[0],
+ accel_state->src_mc_addr[0], accel_state->src_bpp[0],
+ accel_state->dst_pitch, accel_state->dst_height,
+ accel_state->dst_mc_addr, accel_state->dst_bpp,
+ accel_state->rop, accel_state->planemask);
+
}
+ copy_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2));
+
vertex[0].x = (float)dstX;
vertex[0].y = (float)dstY;
vertex[0].s = (float)srcX;
@@ -654,6 +670,12 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst,
accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
+ accel_state->src_width[0] = pSrc->drawable.width;
+ accel_state->src_height[0] = pSrc->drawable.height;
+ accel_state->src_bpp[0] = pSrc->drawable.bitsPerPixel;
+ accel_state->dst_height = pDst->drawable.height;
+ accel_state->dst_bpp = pDst->drawable.bitsPerPixel;
+
// bad pitch
if (accel_state->src_pitch[0] & 7)
return FALSE;
@@ -680,10 +702,11 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst,
pDst->drawable.bitsPerPixel, exaGetPixmapPitch(pDst));
#endif
+ accel_state->rop = rop;
+ accel_state->planemask = planemask;
+
if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst)) {
accel_state->same_surface = TRUE;
- accel_state->rop = rop;
- accel_state->planemask = planemask;
#ifdef SHOW_VERTEXES
ErrorF("same surface!\n");
diff --git a/src/radeon.h b/src/radeon.h
index 9b42afd..bad55bf 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -665,9 +665,14 @@ struct radeon_accel_state {
uint32_t src_size[2];
uint64_t src_mc_addr[2];
uint32_t src_pitch[2];
+ uint32_t src_width[2];
+ uint32_t src_height[2];
+ uint32_t src_bpp[2];
uint32_t dst_size;
uint64_t dst_mc_addr;
uint32_t dst_pitch;
+ uint32_t dst_height;
+ uint32_t dst_bpp;
uint32_t vs_size;
uint64_t vs_mc_addr;
uint32_t ps_size;
commit e3be312b0b73982c24f1f5d9cf76d7caafae0853
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Thu Feb 12 13:48:36 2009 -0500
R6xx/R7xx EXA: properly handle non repeat cases in the texture setup
diff --git a/src/r600_exa.c b/src/r600_exa.c
index 8ae5b53..1e3bd74 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -1077,25 +1077,21 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
tex_res.request_size = 1;
/* component swizzles */
- // XXX double check these
switch (pPict->format) {
case PICT_a1r5g5b5:
case PICT_a8r8g8b8:
- //ErrorF("%s: PICT_a8r8g8b8\n", unit ? "mask" : "src");
tex_res.dst_sel_x = SQ_SEL_Z; //R
tex_res.dst_sel_y = SQ_SEL_Y; //G
tex_res.dst_sel_z = SQ_SEL_X; //B
tex_res.dst_sel_w = SQ_SEL_W; //A
break;
case PICT_a8b8g8r8:
- //ErrorF("%s: PICT_a8b8g8r8\n", unit ? "mask" : "src");
tex_res.dst_sel_x = SQ_SEL_X; //R
tex_res.dst_sel_y = SQ_SEL_Y; //G
tex_res.dst_sel_z = SQ_SEL_Z; //B
tex_res.dst_sel_w = SQ_SEL_W; //A
break;
case PICT_x8b8g8r8:
- //ErrorF("%s: PICT_x8b8g8r8\n", unit ? "mask" : "src");
tex_res.dst_sel_x = SQ_SEL_X; //R
tex_res.dst_sel_y = SQ_SEL_Y; //G
tex_res.dst_sel_z = SQ_SEL_Z; //B
@@ -1103,21 +1099,18 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
break;
case PICT_x1r5g5b5:
case PICT_x8r8g8b8:
- //ErrorF("%s: PICT_x8r8g8b8\n", unit ? "mask" : "src");
tex_res.dst_sel_x = SQ_SEL_Z; //R
tex_res.dst_sel_y = SQ_SEL_Y; //G
tex_res.dst_sel_z = SQ_SEL_X; //B
tex_res.dst_sel_w = SQ_SEL_1; //A
break;
case PICT_r5g6b5:
- //ErrorF("%s: PICT_r5g6b5\n", unit ? "mask" : "src");
tex_res.dst_sel_x = SQ_SEL_Z; //R
tex_res.dst_sel_y = SQ_SEL_Y; //G
tex_res.dst_sel_z = SQ_SEL_X; //B
tex_res.dst_sel_w = SQ_SEL_1; //A
break;
case PICT_a8:
- //ErrorF("%s: PICT_a8\n", unit ? "mask" : "src");
tex_res.dst_sel_x = SQ_SEL_0; //R
tex_res.dst_sel_y = SQ_SEL_0; //G
tex_res.dst_sel_z = SQ_SEL_0; //B
@@ -1135,25 +1128,30 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
tex_samp.id = unit;
tex_samp.border_color = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
- switch (pPict->repeatType) {
- case RepeatNormal:
- tex_samp.clamp_x = SQ_TEX_WRAP;
- tex_samp.clamp_y = SQ_TEX_WRAP;
- break;
- case RepeatPad:
- tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
- tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
- break;
- case RepeatReflect:
- tex_samp.clamp_x = SQ_TEX_MIRROR;
- tex_samp.clamp_y = SQ_TEX_MIRROR;
- break;
- case RepeatNone:
+ if (pPict->repeat) {
+ switch (pPict->repeatType) {
+ case RepeatNormal:
+ tex_samp.clamp_x = SQ_TEX_WRAP;
+ tex_samp.clamp_y = SQ_TEX_WRAP;
+ break;
+ case RepeatPad:
+ tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
+ tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
+ break;
+ case RepeatReflect:
+ tex_samp.clamp_x = SQ_TEX_MIRROR;
+ tex_samp.clamp_y = SQ_TEX_MIRROR;
+ break;
+ case RepeatNone:
+ tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER;
+ tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER;
+ break;
+ default:
+ RADEON_FALLBACK(("Bad repeat 0x%x\n", pPict->repeatType));
+ }
+ } else {
tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER;
tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER;
- break;
- default:
- RADEON_FALLBACK(("Bad repeat 0x%x\n", pPict->repeatType));
}
switch (pPict->filter) {
More information about the xorg-commit
mailing list