Mesa (master): r600: add initial blit support

Alex Deucher agd5f at kemper.freedesktop.org
Sat Jan 16 04:11:06 UTC 2010


Module: Mesa
Branch: master
Commit: 7283a246ce659e69ea9931098a8076848be4b9da
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=7283a246ce659e69ea9931098a8076848be4b9da

Author: Andre Maasikas <amaasikas at gmail.com>
Date:   Fri Jan 15 21:23:31 2010 -0500

r600: add initial blit support

---

 src/mesa/drivers/dri/r600/Makefile            |    2 +
 src/mesa/drivers/dri/r600/r600_blit.c         |  797 +++++++++++++++----------
 src/mesa/drivers/dri/r600/r600_blit.h         |   21 +
 src/mesa/drivers/dri/r600/r600_blit_shaders.h |   28 +
 src/mesa/drivers/dri/r600/r600_context.c      |    4 +
 src/mesa/drivers/dri/r600/r600_context.h      |    3 +
 src/mesa/drivers/dri/r600/r600_texcopy.c      |  169 ++++++
 7 files changed, 701 insertions(+), 323 deletions(-)

diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile
index 26f47b7..b90efce 100644
--- a/src/mesa/drivers/dri/r600/Makefile
+++ b/src/mesa/drivers/dri/r600/Makefile
@@ -59,6 +59,8 @@ DRIVER_SOURCES = \
 		 r700_render.c   \
 		 r600_tex.c      \
 		 r600_texstate.c      \
+		 r600_blit.c     \
+		 r600_texcopy.c  \
 		 r700_debug.c    \
 		 $(RADEON_COMMON_SOURCES) \
 		 $(EGL_SOURCES) \
diff --git a/src/mesa/drivers/dri/r600/r600_blit.c b/src/mesa/drivers/dri/r600/r600_blit.c
index fbca088..6ea1a07 100644
--- a/src/mesa/drivers/dri/r600/r600_blit.c
+++ b/src/mesa/drivers/dri/r600/r600_blit.c
@@ -29,22 +29,85 @@
 #include "r600_context.h"
 
 #include "r600_blit.h"
+#include "r600_blit_shaders.h"
 #include "r600_cmdbuf.h"
 
+static uint32_t mesa_format_to_cb_format(gl_format mesa_format)
+{
+    uint32_t comp_swap, format, color_info = 0;
+    //fprintf(stderr,"format for copy %s\n",_mesa_get_format_name(mesa_format));
+    switch(mesa_format) {
+        /* XXX check and add these */
+        case MESA_FORMAT_RGBA8888:
+            comp_swap = SWAP_STD_REV;
+            format = FMT_8_8_8_8;
+            break;
+        case MESA_FORMAT_RGBA8888_REV:
+            comp_swap = SWAP_STD;
+            format = FMT_8_8_8_8;
+            break;
+        case MESA_FORMAT_ARGB8888:
+        case MESA_FORMAT_XRGB8888:
+            comp_swap = SWAP_ALT;
+            format = FMT_8_8_8_8;
+            break;
+        case MESA_FORMAT_ARGB8888_REV:
+            comp_swap = SWAP_ALT_REV;
+            format = FMT_8_8_8_8;
+            break;
+        case MESA_FORMAT_RGB565:
+            comp_swap = SWAP_ALT_REV;
+            format = FMT_5_6_5;
+            break;
+        case MESA_FORMAT_ARGB1555:
+            comp_swap = SWAP_ALT_REV;
+            format = FMT_1_5_5_5;
+            break;   
+        case MESA_FORMAT_ARGB4444:
+            comp_swap = SWAP_ALT;
+            format = FMT_4_4_4_4;
+            break;
+        case MESA_FORMAT_S8_Z24:
+            format = FMT_8_24;
+            comp_swap = SWAP_STD;
+            break;
+        default:
+            fprintf(stderr,"Invalid format for copy %s\n",_mesa_get_format_name(mesa_format));
+            assert("Invalid format for US output\n");
+            return 0;
+    }
+
+    SETfield(color_info, format, CB_COLOR0_INFO__FORMAT_shift,
+             CB_COLOR0_INFO__FORMAT_mask);
+    SETfield(color_info, comp_swap, COMP_SWAP_shift, COMP_SWAP_mask);
+
+    return color_info;
+
+}
+
 static inline void
-set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
+set_render_target(context_t *context, struct radeon_bo *bo, gl_format format,
+                  int pitch, int bpp, int w, int h, intptr_t dst_offset)
 {
-    int id = 0;
+    uint32_t cb_color0_base, cb_color0_size = 0, cb_color0_info = 0, cb_color0_view;
+    int nPitchInPixel, id = 0;
+    BATCH_LOCALS(&context->radeon);
+
+    cb_color0_base = dst_offset / 256;
+    cb_color0_view = 0;
 
-    nPitchInPixel = rrb->pitch/rrb->cpp;
+    cb_color0_info = mesa_format_to_cb_format(format);
+
+    nPitchInPixel = pitch/bpp;
     SETfield(cb_color0_size, (nPitchInPixel / 8) - 1,
              PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask);
-    SETfield(cb_color0_size, ((nPitchInPixel * height) / 64) - 1,
+    SETfield(cb_color0_size, ((nPitchInPixel * h) / 64) - 1,
              SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask);
     SETfield(cb_color0_info, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask);
     SETfield(cb_color0_info, ARRAY_LINEAR_GENERAL,
-             cb_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
-    if(4 == rrb->cpp)
+             CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
+/*  
+    if(4 == bpp)
     {
         SETfield(cb_color0_info, COLOR_8_8_8_8,
                  CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask);
@@ -57,13 +120,15 @@ set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64
         SETfield(cb_color0_info, SWAP_ALT_REV,
                  COMP_SWAP_shift, COMP_SWAP_mask);
     }
+*/
     SETbit(cb_color0_info, SOURCE_FORMAT_bit);
-    SETbit(cb_color0_info, BLEND_CLAMP_bit);
+    //SETbit(cb_color0_info, BLEND_CLAMP_bit);
+    SETbit(cb_color0_info, BLEND_BYPASS_bit);
     SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
 
     BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
     R600_OUT_BATCH_REGSEQ(CB_COLOR0_BASE + (4 * id), 1);
-    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(cb_color0_base);
     R600_OUT_BATCH_RELOC(0,
 			 bo,
 			 0,
@@ -91,68 +156,116 @@ set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64
 
 }
 
+static inline void load_shaders(GLcontext * ctx)
+{
+
+    radeonContextPtr radeonctx = RADEON_CONTEXT(ctx);
+    context_t *context = R700_CONTEXT(ctx);
+    int i, size; 
+    uint32_t *shader;
+
+    if (context->blit_bo_loaded == 1)
+        return;
+
+    size = 4096;
+    context->blit_bo = radeon_bo_open(radeonctx->radeonScreen->bom, 0, 
+                                      size, 256, RADEON_GEM_DOMAIN_GTT, 0);
+    radeon_bo_map(context->blit_bo, 1);
+    shader = context->blit_bo->ptr;
+
+    for(i=0; i<sizeof(r6xx_vs)/4; i++) {
+        shader[128+i] = r6xx_vs[i];
+    }
+    for(i=0; i<sizeof(r6xx_ps)/4; i++) {
+        shader[256+i] = r6xx_ps[i];
+    }
+
+    radeon_bo_unmap(context->blit_bo);
+    context->blit_bo_loaded = 1;
+
+}
+
 static inline void
-set_shaders(struct drm_device *dev)
+set_shaders(context_t *context)
 {
-    /* FS */
-    pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context));
-    r700->fs.SQ_PGM_START_FS.u32All = r700->vs.SQ_PGM_START_VS.u32All;
-    r700->fs.SQ_PGM_RESOURCES_FS.u32All = 0;
-    r700->fs.SQ_PGM_CF_OFFSET_FS.u32All = 0;
-    /* XXX */
+    struct radeon_bo * pbo = context->blit_bo;
+    BATCH_LOCALS(&context->radeon);
+
+    uint32_t sq_pgm_start_fs = (512 >> 8);
+    uint32_t sq_pgm_resources_fs = 0;
+    uint32_t sq_pgm_cf_offset_fs = 0;
+
+    uint32_t sq_pgm_start_vs = (512 >> 8);
+    uint32_t sq_pgm_resources_vs = (1 << NUM_GPRS_shift);
+    uint32_t sq_pgm_cf_offset_vs = 0;
+
+    uint32_t sq_pgm_start_ps = (1024 >> 8);
+    uint32_t sq_pgm_resources_ps = (1 << NUM_GPRS_shift);
+    uint32_t sq_pgm_cf_offset_ps = 0;
+    uint32_t sq_pgm_exports_ps = (1 << 1);
 
-    if (!pbo)
-	    return;
+    radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs,
+                                  pbo,
+                                  RADEON_GEM_DOMAIN_GTT, 0);
 
     r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
 
+    /* FS */
+
     BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
     R600_OUT_BATCH_REGSEQ(SQ_PGM_START_FS, 1);
-    R600_OUT_BATCH(r700->fs.SQ_PGM_START_FS.u32All);
-    R600_OUT_BATCH_RELOC(r700->fs.SQ_PGM_START_FS.u32All,
+    R600_OUT_BATCH(sq_pgm_start_fs);
+    R600_OUT_BATCH_RELOC(sq_pgm_start_fs,
 			 pbo,
-			 r700->fs.SQ_PGM_START_FS.u32All,
+			 sq_pgm_start_fs,
 			 RADEON_GEM_DOMAIN_GTT, 0, 0);
     END_BATCH();
 
     BEGIN_BATCH_NO_AUTOSTATE(6);
-    R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_FS, r700->fs.SQ_PGM_RESOURCES_FS.u32All);
-    R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_FS, r700->fs.SQ_PGM_CF_OFFSET_FS.u32All);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_FS, sq_pgm_resources_fs);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_FS, sq_pgm_cf_offset_fs);
     END_BATCH();
 
     /* VS */
-    r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
-
+    
     BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
     R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1);
-    R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All);
-    R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All,
+    R600_OUT_BATCH(sq_pgm_start_vs);
+    R600_OUT_BATCH_RELOC(sq_pgm_start_vs,
 		         pbo,
-		         r700->vs.SQ_PGM_START_VS.u32All,
+		         sq_pgm_start_vs,
 		         RADEON_GEM_DOMAIN_GTT, 0, 0);
     END_BATCH();
 
     BEGIN_BATCH_NO_AUTOSTATE(6);
-    R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, r700->vs.SQ_PGM_RESOURCES_VS.u32All);
-    R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, r700->vs.SQ_PGM_CF_OFFSET_VS.u32All);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, sq_pgm_resources_vs);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, sq_pgm_cf_offset_vs);
     END_BATCH();
 
     /* PS */
-    r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
 
     BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
     R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1);
-    R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All);
-    R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All,
+    R600_OUT_BATCH(sq_pgm_start_ps);
+    R600_OUT_BATCH_RELOC(sq_pgm_start_ps,
 		         pbo,
-		         r700->ps.SQ_PGM_START_PS.u32All,
+		         sq_pgm_start_ps,
 		         RADEON_GEM_DOMAIN_GTT, 0, 0);
     END_BATCH();
 
     BEGIN_BATCH_NO_AUTOSTATE(9);
-    R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, r700->ps.SQ_PGM_RESOURCES_PS.u32All);
-    R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, r700->ps.SQ_PGM_EXPORTS_PS.u32All);
-    R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, r700->ps.SQ_PGM_CF_OFFSET_PS.u32All);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, sq_pgm_resources_ps);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, sq_pgm_exports_ps);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, sq_pgm_cf_offset_ps);
+    END_BATCH();
+
+    BEGIN_BATCH_NO_AUTOSTATE(18);
+    R600_OUT_BATCH_REGVAL(SPI_VS_OUT_CONFIG, 0); //EXPORT_COUNT is - 1
+    R600_OUT_BATCH_REGVAL(SPI_VS_OUT_ID_0, 0); 
+    R600_OUT_BATCH_REGVAL(SPI_PS_INPUT_CNTL_0, SEL_CENTROID_bit);
+    R600_OUT_BATCH_REGVAL(SPI_PS_IN_CONTROL_0, (1 << NUM_INTERP_shift));
+    R600_OUT_BATCH_REGVAL(SPI_PS_IN_CONTROL_1, 0);
+    R600_OUT_BATCH_REGVAL(SPI_INTERP_CONTROL_0, 0);
     END_BATCH();
 
     COMMIT_BATCH();
@@ -160,8 +273,10 @@ set_shaders(struct drm_device *dev)
 }
 
 static inline void
-set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
+set_vtx_resource(context_t *context)
 {
+    struct radeon_bo *bo = context->blit_bo;
+    BATCH_LOCALS(&context->radeon);
 
     BEGIN_BATCH_NO_AUTOSTATE(6);
     R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1));
@@ -194,49 +309,109 @@ set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
     R600_OUT_BATCH(0);
     R600_OUT_BATCH(0);
     R600_OUT_BATCH(SQ_TEX_VTX_VALID_BUFFER << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift);
-    R600_OUT_BATCH_RELOC(uSQ_VTX_CONSTANT_WORD0_0,
-                         paos->bo,
-                         uSQ_VTX_CONSTANT_WORD0_0,
+    R600_OUT_BATCH_RELOC(SQ_VTX_CONSTANT_WORD0_0,
+                         bo,
+                         SQ_VTX_CONSTANT_WORD0_0,
                          RADEON_GEM_DOMAIN_GTT, 0, 0);
     END_BATCH();
     COMMIT_BATCH();
 
+    radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs,
+                                  bo,
+                                  RADEON_GEM_DOMAIN_GTT, 0);
+
 }
 
-static inline void
-set_tex_resource(drm_radeon_private_t *dev_priv,
-		 int format, int w, int h, int pitch, u64 gpu_addr)
+static void
+set_tex_resource(context_t * context,
+		 gl_format format, struct radeon_bo *bo, int w, int h,
+		 int pitch, intptr_t src_offset)
 {
-	uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
-	RING_LOCALS;
-	DRM_DEBUG("\n");
-
-	r700SyncSurf(context, bo,
-		     RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM,
-		     0, TC_ACTION_ENA_bit);
-
-	BEGIN_BATCH_NO_AUTOSTATE(9 + 4);
-	R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
-	R600_OUT_BATCH(i * 7);
-
-	R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0);
-	R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1);
-	R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2);
-	R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE3);
-	R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4);
-	R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5);
-	R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6);
-	R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2,
-			     bo,
-			     offset,
-			     RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
-	R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3,
-			     bo,
-			     r700->textures[i]->SQ_TEX_RESOURCE3,
-			     RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
-	END_BATCH();
-	COMMIT_BATCH();
+    uint32_t sq_tex_resource0, sq_tex_resource1, sq_tex_resource2, sq_tex_resource4, sq_tex_resource6;
+    int bpp = _mesa_get_format_bytes(format);
+    int TexelPitch = pitch/bpp;
+    // int tex_format = mesa_format_to_us_format(format);
+
+    sq_tex_resource0 = sq_tex_resource1 = sq_tex_resource2 = sq_tex_resource4 = sq_tex_resource6 = 0;
+    BATCH_LOCALS(&context->radeon);
+        
+    SETfield(sq_tex_resource0, SQ_TEX_DIM_2D, DIM_shift, DIM_mask);
+    SETfield(sq_tex_resource0, ARRAY_LINEAR_GENERAL,
+                 SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift,
+                 SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+
+    if (bpp == 4) {
+        SETfield(sq_tex_resource1, FMT_8_8_8_8,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+        SETfield(sq_tex_resource4, SQ_SEL_Z,
+                 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift,
+                 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+        SETfield(sq_tex_resource4, SQ_SEL_Y,
+                 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift,
+                 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+        SETfield(sq_tex_resource4, SQ_SEL_X,
+                 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift,
+                 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+        SETfield(sq_tex_resource4, SQ_SEL_W,
+                 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift,
+                 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+    } else {
+        SETfield(sq_tex_resource1, FMT_5_6_5,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+        SETfield(sq_tex_resource4, SQ_SEL_Z,
+                 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift,
+                 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+        SETfield(sq_tex_resource4, SQ_SEL_Y,
+                 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift,
+                 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+        SETfield(sq_tex_resource4, SQ_SEL_X,
+                 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift,
+                 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+        SETfield(sq_tex_resource4, SQ_SEL_1,
+                 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift,
+                 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+
+    }
+
+    SETfield(sq_tex_resource0, (TexelPitch/8)-1, PITCH_shift, PITCH_mask);
+    SETfield(sq_tex_resource0, w - 1, TEX_WIDTH_shift, TEX_WIDTH_mask);
+    SETfield(sq_tex_resource1, h - 1, TEX_HEIGHT_shift, TEX_HEIGHT_mask);
+
+    sq_tex_resource2 = src_offset / 256;
+
+    SETfield(sq_tex_resource6, SQ_TEX_VTX_VALID_TEXTURE,
+             SQ_TEX_RESOURCE_WORD6_0__TYPE_shift,
+             SQ_TEX_RESOURCE_WORD6_0__TYPE_mask);
 
+    r700SyncSurf(context, bo,
+                 RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM,
+		 0, TC_ACTION_ENA_bit);
+
+    BEGIN_BATCH_NO_AUTOSTATE(9 + 4);
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
+    R600_OUT_BATCH(0 * 7);
+
+    R600_OUT_BATCH(sq_tex_resource0);
+    R600_OUT_BATCH(sq_tex_resource1);
+    R600_OUT_BATCH(sq_tex_resource2);
+    R600_OUT_BATCH(0); //SQ_TEX_RESOURCE3
+    R600_OUT_BATCH(sq_tex_resource4);
+    R600_OUT_BATCH(0); //SQ_TEX_RESOURCE5
+    R600_OUT_BATCH(sq_tex_resource6);
+    R600_OUT_BATCH_RELOC(0,
+		     bo,
+		     0,
+		     RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+    R600_OUT_BATCH_RELOC(0,
+		     bo,
+		     0,
+		     RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+    END_BATCH();
+    COMMIT_BATCH();
+/*
 	if (h < 1)
 		h = 1;
 
@@ -253,251 +428,203 @@ set_tex_resource(drm_radeon_private_t *dev_priv,
 				 (2 << 22) |
 				 (3 << 25));
 
-	BEGIN_RING(9);
-	OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
-	OUT_RING(0);
-	OUT_RING(sq_tex_resource_word0);
-	OUT_RING(sq_tex_resource_word1);
-	OUT_RING(gpu_addr >> 8);
-	OUT_RING(gpu_addr >> 8);
-	OUT_RING(sq_tex_resource_word4);
-	OUT_RING(0);
-	OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);
-	ADVANCE_RING();
+*/
+}
+
+static void
+set_tex_sampler(context_t * context)
+{
+    uint32_t sq_tex_sampler_word0 = 0, sq_tex_sampler_word1 = 0, sq_tex_sampler_word2 = 0;
+    int i = 0;
+    
+    SETbit(sq_tex_sampler_word2, SQ_TEX_SAMPLER_WORD2_0__TYPE_bit);
+    
+    BATCH_LOCALS(&context->radeon);
+
+    BEGIN_BATCH_NO_AUTOSTATE(5);
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3));
+    R600_OUT_BATCH(i * 3);
+    R600_OUT_BATCH(sq_tex_sampler_word0);
+    R600_OUT_BATCH(sq_tex_sampler_word1);
+    R600_OUT_BATCH(sq_tex_sampler_word2);
+    END_BATCH();
 
 }
 
 static inline void
-set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
+set_scissors(context_t *context, int x1, int y1, int x2, int y2)
 {
-	RING_LOCALS;
-	DRM_DEBUG("\n");
-
-	BEGIN_BATCH_NO_AUTOSTATE(22);
-	R600_OUT_BATCH_REGSEQ(PA_SC_SCREEN_SCISSOR_TL, 2);
-	R600_OUT_BATCH(r700->PA_SC_SCREEN_SCISSOR_TL.u32All);
-	R600_OUT_BATCH(r700->PA_SC_SCREEN_SCISSOR_BR.u32All);
-
-	R600_OUT_BATCH_REGSEQ(PA_SC_WINDOW_OFFSET, 12);
-	R600_OUT_BATCH(r700->PA_SC_WINDOW_OFFSET.u32All);
-	R600_OUT_BATCH(r700->PA_SC_WINDOW_SCISSOR_TL.u32All);
-	R600_OUT_BATCH(r700->PA_SC_WINDOW_SCISSOR_BR.u32All);
-	R600_OUT_BATCH(r700->PA_SC_CLIPRECT_RULE.u32All);
-	R600_OUT_BATCH(r700->PA_SC_CLIPRECT_0_TL.u32All);
-	R600_OUT_BATCH(r700->PA_SC_CLIPRECT_0_BR.u32All);
-	R600_OUT_BATCH(r700->PA_SC_CLIPRECT_1_TL.u32All);
-	R600_OUT_BATCH(r700->PA_SC_CLIPRECT_1_BR.u32All);
-	R600_OUT_BATCH(r700->PA_SC_CLIPRECT_2_TL.u32All);
-	R600_OUT_BATCH(r700->PA_SC_CLIPRECT_2_BR.u32All);
-	R600_OUT_BATCH(r700->PA_SC_CLIPRECT_3_TL.u32All);
-	R600_OUT_BATCH(r700->PA_SC_CLIPRECT_3_BR.u32All);
-
-	R600_OUT_BATCH_REGSEQ(PA_SC_GENERIC_SCISSOR_TL, 2);
-	R600_OUT_BATCH(r700->PA_SC_GENERIC_SCISSOR_TL.u32All);
-	R600_OUT_BATCH(r700->PA_SC_GENERIC_SCISSOR_BR.u32All);
-	END_BATCH();
-	COMMIT_BATCH();
-
-
-	BEGIN_RING(12);
-	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
-	OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
-	OUT_RING((x1 << 0) | (y1 << 16));
-	OUT_RING((x2 << 0) | (y2 << 16));
-
-	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
-	OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
-	OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
-	OUT_RING((x2 << 0) | (y2 << 16));
-
-	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
-	OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
-	OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
-	OUT_RING((x2 << 0) | (y2 << 16));
-	ADVANCE_RING();
+	
+    int i;
+    uint32_t clip_tl = 0, clip_br = 0;
+
+    SETfield(clip_tl, 0, PA_SC_CLIPRECT_0_TL__TL_X_shift,
+                         PA_SC_CLIPRECT_0_TL__TL_X_mask);
+    SETfield(clip_tl, 0, PA_SC_CLIPRECT_0_TL__TL_Y_shift,
+                         PA_SC_CLIPRECT_0_TL__TL_Y_mask);
+    SETfield(clip_br, 8191, PA_SC_CLIPRECT_0_BR__BR_X_shift,
+                            PA_SC_CLIPRECT_0_BR__BR_X_mask);
+    SETfield(clip_br, 8191, PA_SC_CLIPRECT_0_BR__BR_Y_shift,
+                            PA_SC_CLIPRECT_0_BR__BR_Y_mask);
+
+    BATCH_LOCALS(&context->radeon);
+
+    BEGIN_BATCH_NO_AUTOSTATE(13);
+    R600_OUT_BATCH_REGSEQ(PA_SC_SCREEN_SCISSOR_TL, 2);
+    R600_OUT_BATCH((x1 << 0) | (y1 << 16));
+    R600_OUT_BATCH((x2 << 0) | (y2 << 16));
+
+    R600_OUT_BATCH_REGSEQ(PA_SC_WINDOW_OFFSET, 3);
+    R600_OUT_BATCH(0); //PA_SC_WINDOW_OFFSET
+    R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit)); //PA_SC_WINDOW_SCISSOR_TL
+    R600_OUT_BATCH((x2 << 0) | (y2 << 16));
+
+/* clip disabled ?
+    R600_OUT_BATCH(CLIP_RULE_mask); //PA_SC_CLIPRECT_RULE);
+    R600_OUT_BATCH(clip_tl); //PA_SC_CLIPRECT_0_TL
+    R600_OUT_BATCH(clip_br); //PA_SC_CLIPRECT_0_BR
+    R600_OUT_BATCH(clip_tl); // 1
+    R600_OUT_BATCH(clip_br);
+    R600_OUT_BATCH(clip_tl); // 2
+    R600_OUT_BATCH(clip_br);
+    R600_OUT_BATCH(clip_tl); // 3
+    R600_OUT_BATCH(clip_br);
+*/
+
+    R600_OUT_BATCH_REGSEQ(PA_SC_GENERIC_SCISSOR_TL, 2);
+    R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit));
+    R600_OUT_BATCH((x2 << 0) | (y2 << 16));
+    END_BATCH();
+
+    /* XXX 16 of these PA_SC_VPORT_SCISSOR_0_TL_num ... */
+    BEGIN_BATCH_NO_AUTOSTATE(4);
+    R600_OUT_BATCH_REGSEQ(PA_SC_VPORT_SCISSOR_0_TL, 2 );
+    R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit));
+    R600_OUT_BATCH((x2 << 0) | (y2 << 16));
+    END_BATCH();
+
+/*
+    BEGIN_BATCH_NO_AUTOSTATE(2 + 2 * PA_SC_VPORT_ZMIN_0_num);
+    R600_OUT_BATCH_REGSEQ(PA_SC_VPORT_ZMIN_0, 2 * PA_SC_VPORT_ZMIN_0_num);
+    for(i = 0 i < PA_SC_VPORT_ZMIN_0_num; i++)
+        R600_OUT_BATCH(radeonPackFloat32(0.0F));
+        R600_OUT_BATCH(radeonPackFloat32(1.0F));
+    } 
+    END_BATCH();
+*/
+    COMMIT_BATCH();
+
 }
 
-static inline void
-draw_auto(drm_radeon_private_t *dev_priv)
+static void 
+set_vb_data(context_t * context, int src_x, int src_y, int dst_x, int dst_y,
+            int w, int h, int src_h, unsigned flip_y)
 {
-	RING_LOCALS;
-	DRM_DEBUG("\n");
+    float *vb;
+    radeon_bo_map(context->blit_bo, 1);
+    vb = context->blit_bo->ptr;
 
-	BEGIN_RING(10);
-	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
-	OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);
-	OUT_RING(DI_PT_RECTLIST);
+    vb[0] = (float)(dst_x);
+    vb[1] = (float)(dst_y);
+    vb[2] = (float)(src_x);
+    vb[3] = (flip_y) ? (float)(src_h - src_y) : (float)src_y;
 
-	OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
-	OUT_RING(DI_INDEX_SIZE_16_BIT);
+    vb[4] = (float)(dst_x);
+    vb[5] = (float)(dst_y + h);
+    vb[6] = (float)(src_x);
+    vb[7] = (flip_y) ? (float)(src_h - (src_y + h)) : (float)(src_y + h);
 
-	OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
-	OUT_RING(1);
+    vb[8] = (float)(dst_x + w);
+    vb[9] = (float)(dst_y + h);
+    vb[10] = (float)(src_x + w);
+    vb[11] = (flip_y) ? (float)(src_h - (src_y + h)) : (float)(src_y + h);
 
-	OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
-	OUT_RING(3);
-	OUT_RING(DI_SRC_SEL_AUTO_INDEX);
+    radeon_bo_unmap(context->blit_bo);
 
-	ADVANCE_RING();
-	COMMIT_RING();
 }
 
 static inline void
-set_default_state(drm_radeon_private_t *dev_priv)
+draw_auto(context_t *context)
 {
-	int i;
-	u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
-	u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
-	int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
-	int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
-	int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
-	RING_LOCALS;
-
-	switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
-	case CHIP_R600:
-		num_ps_gprs = 192;
-		num_vs_gprs = 56;
-		num_temp_gprs = 4;
-		num_gs_gprs = 0;
-		num_es_gprs = 0;
-		num_ps_threads = 136;
-		num_vs_threads = 48;
-		num_gs_threads = 4;
-		num_es_threads = 4;
-		num_ps_stack_entries = 128;
-		num_vs_stack_entries = 128;
-		num_gs_stack_entries = 0;
-		num_es_stack_entries = 0;
-		break;
-	case CHIP_RV630:
-	case CHIP_RV635:
-		num_ps_gprs = 84;
-		num_vs_gprs = 36;
-		num_temp_gprs = 4;
-		num_gs_gprs = 0;
-		num_es_gprs = 0;
-		num_ps_threads = 144;
-		num_vs_threads = 40;
-		num_gs_threads = 4;
-		num_es_threads = 4;
-		num_ps_stack_entries = 40;
-		num_vs_stack_entries = 40;
-		num_gs_stack_entries = 32;
-		num_es_stack_entries = 16;
-		break;
-	case CHIP_RV610:
-	case CHIP_RV620:
-	case CHIP_RS780:
-	case CHIP_RS880:
-	default:
-		num_ps_gprs = 84;
-		num_vs_gprs = 36;
-		num_temp_gprs = 4;
-		num_gs_gprs = 0;
-		num_es_gprs = 0;
-		num_ps_threads = 136;
-		num_vs_threads = 48;
-		num_gs_threads = 4;
-		num_es_threads = 4;
-		num_ps_stack_entries = 40;
-		num_vs_stack_entries = 40;
-		num_gs_stack_entries = 32;
-		num_es_stack_entries = 16;
-		break;
-	case CHIP_RV670:
-		num_ps_gprs = 144;
-		num_vs_gprs = 40;
-		num_temp_gprs = 4;
-		num_gs_gprs = 0;
-		num_es_gprs = 0;
-		num_ps_threads = 136;
-		num_vs_threads = 48;
-		num_gs_threads = 4;
-		num_es_threads = 4;
-		num_ps_stack_entries = 40;
-		num_vs_stack_entries = 40;
-		num_gs_stack_entries = 32;
-		num_es_stack_entries = 16;
-		break;
-	case CHIP_RV770:
-		num_ps_gprs = 192;
-		num_vs_gprs = 56;
-		num_temp_gprs = 4;
-		num_gs_gprs = 0;
-		num_es_gprs = 0;
-		num_ps_threads = 188;
-		num_vs_threads = 60;
-		num_gs_threads = 0;
-		num_es_threads = 0;
-		num_ps_stack_entries = 256;
-		num_vs_stack_entries = 256;
-		num_gs_stack_entries = 0;
-		num_es_stack_entries = 0;
-		break;
-	case CHIP_RV730:
-	case CHIP_RV740:
-		num_ps_gprs = 84;
-		num_vs_gprs = 36;
-		num_temp_gprs = 4;
-		num_gs_gprs = 0;
-		num_es_gprs = 0;
-		num_ps_threads = 188;
-		num_vs_threads = 60;
-		num_gs_threads = 0;
-		num_es_threads = 0;
-		num_ps_stack_entries = 128;
-		num_vs_stack_entries = 128;
-		num_gs_stack_entries = 0;
-		num_es_stack_entries = 0;
-		break;
-	case CHIP_RV710:
-		num_ps_gprs = 192;
-		num_vs_gprs = 56;
-		num_temp_gprs = 4;
-		num_gs_gprs = 0;
-		num_es_gprs = 0;
-		num_ps_threads = 144;
-		num_vs_threads = 48;
-		num_gs_threads = 0;
-		num_es_threads = 0;
-		num_ps_stack_entries = 128;
-		num_vs_stack_entries = 128;
-		num_gs_stack_entries = 0;
-		num_es_stack_entries = 0;
-		break;
-	}
+    BATCH_LOCALS(&context->radeon);
+    uint32_t vgt_primitive_type = 0, vgt_index_type = 0, vgt_draw_initiator = 0, vgt_num_indices;
+ 
+    SETfield(vgt_primitive_type, DI_PT_RECTLIST,
+             VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift,
+             VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
+    SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift,
+             INDEX_TYPE_mask);
+    SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift,
+             MAJOR_MODE_mask);
+    SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift,
+             SOURCE_SELECT_mask);
+
+    vgt_num_indices = 3;
+
+    BEGIN_BATCH_NO_AUTOSTATE(10);
+    // prim
+    R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1);
+    R600_OUT_BATCH(vgt_primitive_type);
+    // index type
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+    R600_OUT_BATCH(vgt_index_type);
+    // num instances
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+    R600_OUT_BATCH(1);
+    //
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
+    R600_OUT_BATCH(vgt_num_indices);
+    R600_OUT_BATCH(vgt_draw_initiator);
+
+    END_BATCH();
+    COMMIT_BATCH();
+}
+
+static inline void
+set_default_state(context_t *context)
+{
+    int i;
+    BATCH_LOCALS(&context->radeon);
+/*    
+    BEGIN_BATCH_NO_AUTOSTATE(sizeof(r7xx_default_state)/4);
+    for(i=0; i< sizeof(r7xx_default_state)/4; i++)
+        R600_OUT_BATCH(r7xx_default_state[i]);
+    END_BATCH();
+*/
+    BEGIN_BATCH_NO_AUTOSTATE(45);
+    R600_OUT_BATCH_REGVAL(CB_CLRCMP_CONTROL, 
+                         (CLRCMP_SEL_SRC << CLRCMP_FCN_SEL_shift));
+    R600_OUT_BATCH_REGVAL(SQ_VTX_BASE_VTX_LOC, 0);
+    R600_OUT_BATCH_REGVAL(SQ_VTX_START_INST_LOC, 0);
+    R600_OUT_BATCH_REGVAL(DB_DEPTH_INFO, 0);
+    R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, 0);
+    R600_OUT_BATCH_REGVAL(CB_SHADER_MASK, (OUTPUT0_ENABLE_mask));
+    R600_OUT_BATCH_REGVAL(CB_TARGET_MASK, (TARGET0_ENABLE_mask));
+    R600_OUT_BATCH_REGVAL(R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit));
+    R600_OUT_BATCH_REGVAL(CB_COLOR_CONTROL, (0xcc << ROP3_shift));
 
-	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
-	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
-	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
-	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
-	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
-		sq_config = 0;
-	else
-		sq_config = R600_VC_ENABLE;
-
-	sq_config |= (R600_DX9_CONSTS |
-		      R600_ALU_INST_PREFER_VECTOR |
-		      R600_PS_PRIO(0) |
-		      R600_VS_PRIO(1) |
-		      R600_GS_PRIO(2) |
-		      R600_ES_PRIO(3));
-
-	sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
-				  R600_NUM_VS_GPRS(num_vs_gprs) |
-				  R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
-	sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
-				  R600_NUM_ES_GPRS(num_es_gprs));
-	sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
-				   R600_NUM_VS_THREADS(num_vs_threads) |
-				   R600_NUM_GS_THREADS(num_gs_threads) |
-				   R600_NUM_ES_THREADS(num_es_threads));
-	sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
-				    R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
-	sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
-				    R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
-
-	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
+/*
+    R600_OUT_BATCH_REGSEQ(PA_CL_VPORT_XSCALE_0, 6);
+    R600_OUT_BATCH(0.0f); // PA_CL_VPORT_XSCALE
+    R600_OUT_BATCH(0.0f);      // PA_CL_VPORT_XOFFSET
+    R600_OUT_BATCH(0.0f);      // PA_CL_VPORT_YSCALE
+    R600_OUT_BATCH(0.0f);      // PA_CL_VPORT_YOFFSET
+    R600_OUT_BATCH(0.0f);      // PA_CL_VPORT_ZSCALE
+    R600_OUT_BATCH(0.0f);      // PA_CL_VPORT_ZOFFSET
+*/
+    R600_OUT_BATCH_REGVAL(PA_CL_VTE_CNTL, VTX_XY_FMT_bit);
+    R600_OUT_BATCH_REGVAL(PA_CL_VS_OUT_CNTL, 0 );
+    R600_OUT_BATCH_REGVAL(PA_CL_CLIP_CNTL, CLIP_DISABLE_bit);
+    R600_OUT_BATCH_REGVAL(PA_SU_SC_MODE_CNTL, (FACE_bit) |
+        (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) |
+        (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)); 
+    R600_OUT_BATCH_REGVAL(PA_SU_VTX_CNTL, (PIX_CENTER_bit) |
+        (X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
+        (X_1_256TH << QUANT_MODE_shift));
+    R600_OUT_BATCH_REGVAL(VGT_MAX_VTX_INDX, 2048);
+    END_BATCH();
+/*
+       if (dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
 		BEGIN_RING(r7xx_default_size + 10);
 		for (i = 0; i < r7xx_default_size; i++)
 			OUT_RING(r7xx_default_state[i]);
@@ -508,7 +635,15 @@ set_default_state(drm_radeon_private_t *dev_priv)
 	}
 	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
 	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
+
+	//OR :
+	r700WaitForIdleClean(context_t *context);
+*/
 	/* SQ config */
+	/*XXX*/
+/*
+     r700SendSQConfig(GLcontext *ctx, struct radeon_state_atom *atom);
+	// OR:
 	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));
 	OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);
 	OUT_RING(sq_config);
@@ -518,21 +653,22 @@ set_default_state(drm_radeon_private_t *dev_priv)
 	OUT_RING(sq_stack_resource_mgmt_1);
 	OUT_RING(sq_stack_resource_mgmt_2);
 	ADVANCE_RING();
+*/
 }
 
-static GLboolean validate_buffers(struct r300_context *r300,
+static GLboolean validate_buffers(context_t *rmesa,
                                   struct radeon_bo *src_bo,
                                   struct radeon_bo *dst_bo)
 {
     int ret;
-    radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs,
+    radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
                                       src_bo, RADEON_GEM_DOMAIN_VRAM, 0);
 
-    radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs,
+    radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
                                       dst_bo, 0, RADEON_GEM_DOMAIN_VRAM);
 
-    ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs,
-                                        first_elem(&r300->radeon.dma.reserved)->bo,
+    ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs,
+                                        first_elem(&rmesa->radeon.dma.reserved)->bo,
                                         RADEON_GEM_DOMAIN_GTT, 0);
     if (ret)
         return GL_FALSE;
@@ -540,21 +676,29 @@ static GLboolean validate_buffers(struct r300_context *r300,
     return GL_TRUE;
 }
 
-GLboolean r600_blit(struct r300_context *r300,
+GLboolean r600_blit(context_t *context,
                     struct radeon_bo *src_bo,
                     intptr_t src_offset,
                     gl_format src_mesaformat,
                     unsigned src_pitch,
                     unsigned src_width,
                     unsigned src_height,
+                    unsigned src_x,
+                    unsigned src_y,
                     struct radeon_bo *dst_bo,
                     intptr_t dst_offset,
                     gl_format dst_mesaformat,
                     unsigned dst_pitch,
                     unsigned dst_width,
-                    unsigned dst_height)
+                    unsigned dst_height,
+                    unsigned dst_x,
+                    unsigned dst_y,
+                    unsigned w,
+                    unsigned h,
+                    unsigned flip_y)
 {
     int id = 0;
+    uint32_t cb_bpp;
 
     if (src_bo == dst_bo) {
         return GL_FALSE;
@@ -571,39 +715,46 @@ GLboolean r600_blit(struct r300_context *r300,
                 _mesa_get_format_name(dst_mesaformat));
     }
 
-    if (!validate_buffers(r300, src_bo, dst_bo))
+    /* Flush is needed to make sure that source buffer has correct data */
+    radeonFlush(context->radeon.glCtx);
+
+    if (!validate_buffers(context, src_bo, dst_bo))
         return GL_FALSE;
 
-    rcommonEnsureCmdBufSpace(&r300->radeon, 200, __FUNCTION__);
+    rcommonEnsureCmdBufSpace(&context->radeon, 200, __FUNCTION__);
 
+    /* set clear state */
+    set_default_state(context);
     /* src */
-    set_tex_resource(dev_priv, tex_format,
-		     src_pitch / cpp,
-		     sy2, src_pitch / cpp,
-		     src_gpu_addr);
+    set_tex_resource(context, src_mesaformat, src_bo,
+		     src_width, src_height, src_pitch, src_offset);
 
-    r700SyncSurf(context, src_bo,
-		 RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM,
-		 0, TC_ACTION_ENA_bit);
+    set_tex_sampler(context);
 
     /* dst */
-    set_render_target(dev_priv, cb_format,
-		      dst_pitch / cpp, dy2,
-		      dst_gpu_addr);
+    cb_bpp = _mesa_get_format_bytes(dst_mesaformat);
+    set_render_target(context, dst_bo, dst_mesaformat,
+		      dst_pitch, cb_bpp, dst_width, dst_height, dst_offset);
+    /* shaders */ 
+    load_shaders(context->radeon.glCtx);
+
+    set_shaders(context);
 
     /* scissors */
-    set_scissors(dev_priv, 0, 0, width, height);
+    set_scissors(context, 0, 0, 8191, 8191);
 
+    set_vb_data(context, src_x, src_y, dst_x, dst_y, w, h, src_height, flip_y);
     /* Vertex buffer setup */
-    set_vtx_resource(dev_priv, vb_addr);
+    set_vtx_resource(context);
 
     /* draw */
-    draw_auto(dev_priv);
+    draw_auto(context);
 
-    r700SyncSurf(context, dst_bo, 0, RADEON_GEM_DOMAIN_VRAM,
+    r700SyncSurf(context, dst_bo, 0,
+                 RADEON_GEM_DOMAIN_VRAM|RADEON_GEM_DOMAIN_GTT,
 		 CB_ACTION_ENA_bit | (1 << (id + 6)));
 
-    radeonFlush(r300->radeon.glCtx);
+    radeonFlush(context->radeon.glCtx);
 
     return GL_TRUE;
 }
diff --git a/src/mesa/drivers/dri/r600/r600_blit.h b/src/mesa/drivers/dri/r600/r600_blit.h
new file mode 100644
index 0000000..f30c13c
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_blit.h
@@ -0,0 +1,21 @@
+GLboolean r600_blit(context_t *context,
+                    struct radeon_bo *src_bo,
+                    intptr_t src_offset,
+                    gl_format src_mesaformat,
+                    unsigned src_pitch,
+                    unsigned src_width,
+                    unsigned src_height,
+                    unsigned src_x_offset,
+                    unsigned src_y_offset,
+                    struct radeon_bo *dst_bo,
+                    intptr_t dst_offset,
+                    gl_format dst_mesaformat,
+                    unsigned dst_pitch,
+                    unsigned dst_width,
+                    unsigned dst_height,
+                    unsigned dst_x_offset,
+                    unsigned dst_y_offset,
+                    unsigned w,
+                    unsigned h,
+                    unsigned flip_y);
+
diff --git a/src/mesa/drivers/dri/r600/r600_blit_shaders.h b/src/mesa/drivers/dri/r600/r600_blit_shaders.h
new file mode 100644
index 0000000..492dde9
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_blit_shaders.h
@@ -0,0 +1,28 @@
+const uint32_t r6xx_vs[] =
+{
+        0x00000004, // CF_DWORD0(ADDR(4))
+        0x81000000, // SQ_CF_INST_VTX COUNT(1)
+        0x0000203c, // CF_EXP_IMP CF_POS0 SQ_EXPORT_POS RW_GPR(0) ELEM_SIZE(0)
+        0x94000b08, // SQ_CF_INST_EXPORT_DONE SWZ XY01 BARRIER(1)
+        0x00004000, // CF_EXP_IMP 0 SQ_EXPORT_PARAM RW_GPR(0) ELEM_SIZE(0)
+        0x14200b1a, // SQ_CF_INST_EXPORT_DONE SWZ ZW01 EOP(1) BARRIER(0)
+        0x00000000,
+        0x00000000,
+        0x3c000000, // SQ_VTX_INST_FETCH BUFFER_ID(0) MEGA_FETCH_COUNT(16)
+        0x68cd1000, // DST_GPR(0) DST_SWZ: XYZW DATA_FORMAT(35) SQ_NUM_FORMAT_SCALED SQ_FORMAT_COMP_SIGNED
+        0x00080000, // ENDIAN_SWAP(SQ_ENDIAN_NONE) MEGA_FETCH(1)
+        0x00000000, // VTX_DWORD_PAD
+};
+
+const uint32_t r6xx_ps[] =
+{
+        0x00000002, // CF_DWORD0 AADR(2)
+        0x80800000, // SQ_CF_INST_TEX COUNT(1)
+        0x00000000, // CF_ALLOC_IMP_EXP0 SQ_EXPORT_PIXEL RW_GPR(0) ELEM_SIZE(0)
+        0x94200688, // SQ_CF_INST_EXPORT_DONE EOP(1) BARRIER(1) SWZ: XYZW
+        0x00000010, // SQ_TEX_INST_SAMPLE SRC_GPR(0) RESOURCE_ID(0)
+        0x000d1000, // DST_GPR(0) SWZ: XYZW TEX_UNNORMALIZED
+        0xb0800000, // SAMPLER_ID(0) SRC_SWZ XYZW
+        0x00000000, // TEX_DWORD_PAD
+};
+
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index cb54949..55680fa 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -384,6 +384,10 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
 	r700InitIoctlFuncs(&functions);
 	radeonInitBufferObjectFuncs(&functions);
 
+	if (r600->radeon.radeonScreen->kernel_mm) {
+                r600_init_texcopy_functions(&functions);
+        }
+	
 	if (!radeonInitContext(&r600->radeon, &functions,
 			       glVisual, driContextPriv,
 			       sharedContextPrivate)) {
diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h
index a1b4af7..774b30e 100644
--- a/src/mesa/drivers/dri/r600/r600_context.h
+++ b/src/mesa/drivers/dri/r600/r600_context.h
@@ -148,6 +148,8 @@ struct r600_context {
 	GLint      nNumActiveAos;
 	StreamDesc stream_desc[VERT_ATTRIB_MAX];
     struct r700_index_buffer ind_buf;
+	struct radeon_bo *blit_bo;
+	GLboolean blit_bo_loaded;
 };
 
 #define R700_CONTEXT(ctx)		((context_t *)(ctx->DriverCtx))
@@ -181,6 +183,7 @@ extern GLboolean r700SyncSurf(context_t *context,
 extern void r700Start3D(context_t *context);
 extern void r600InitAtoms(context_t *context);
 extern void r700InitDraw(GLcontext *ctx);
+extern void r600_init_texcopy_functions(struct dd_function_table *table);
 
 #define RADEON_D_CAPTURE 0
 #define RADEON_D_PLAYBACK 1
diff --git a/src/mesa/drivers/dri/r600/r600_texcopy.c b/src/mesa/drivers/dri/r600/r600_texcopy.c
new file mode 100644
index 0000000..287a82e
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_texcopy.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2009 Maciej Cencora <m.cencora at gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_common.h"
+#include "r600_context.h"
+
+#include "main/image.h"
+#include "main/teximage.h"
+#include "main/texstate.h"
+#include "drivers/common/meta.h"
+
+#include "radeon_mipmap_tree.h"
+#include "r600_blit.h"
+#include <main/debug.h>
+
+// TODO:
+// need to pass correct pitch for small dst textures!
+static GLboolean
+do_copy_texsubimage(GLcontext *ctx,
+                    GLenum target, GLint level,
+                    struct radeon_tex_obj *tobj,
+                    radeon_texture_image *timg,
+                    GLint dstx, GLint dsty,
+                    GLint x, GLint y,
+                    GLsizei width, GLsizei height)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    struct radeon_renderbuffer *rrb;
+
+    if (_mesa_get_format_bits(timg->base.TexFormat, GL_DEPTH_BITS) > 0) {
+        rrb = radeon_get_depthbuffer(&context->radeon);
+    } else {
+        rrb = radeon_get_colorbuffer(&context->radeon);
+    }
+
+    if (!timg->mt) {
+        radeon_validate_texture_miptree(ctx, &tobj->base);
+    }
+
+    assert(rrb && rrb->bo);
+    assert(timg->mt->bo);
+    assert(timg->base.Width >= dstx + width);
+    assert(timg->base.Height >= dsty + height);
+
+    intptr_t src_offset = rrb->draw_offset;
+    intptr_t dst_offset = radeon_miptree_image_offset(timg->mt, _mesa_tex_target_to_face(target), level);
+
+    if (src_offset % 256 || dst_offset % 256) {
+        return GL_FALSE;
+    }
+
+    if (0) {
+        fprintf(stderr, "%s: copying to face %d, level %d\n",
+                __FUNCTION__, _mesa_tex_target_to_face(target), level);
+        fprintf(stderr, "to: x %d, y %d, offset %d\n", dstx, dsty, (uint32_t) dst_offset);
+        fprintf(stderr, "from (%dx%d) width %d, height %d, offset %d, pitch %d\n",
+                x, y, rrb->base.Width, rrb->base.Height, (uint32_t) src_offset, rrb->pitch/rrb->cpp);
+        fprintf(stderr, "src size %d, dst size %d\n", rrb->bo->size, timg->mt->bo->size);
+
+    }
+
+
+    /* blit from src buffer to texture */
+    return r600_blit(context, rrb->bo, src_offset, rrb->base.Format, rrb->pitch,
+                     rrb->base.Width, rrb->base.Height, x, y,
+                     timg->mt->bo, dst_offset, timg->base.TexFormat,
+                     timg->mt->levels[level].rowstride, timg->base.Width, timg->base.Height,
+                     dstx, dsty, width, height, 1);
+}
+
+static void
+r600CopyTexImage2D(GLcontext *ctx, GLenum target, GLint level,
+                   GLenum internalFormat,
+                   GLint x, GLint y, GLsizei width, GLsizei height,
+                   GLint border)
+{
+    struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
+    struct gl_texture_object *texObj =
+        _mesa_select_tex_object(ctx, texUnit, target);
+    struct gl_texture_image *texImage =
+        _mesa_select_tex_image(ctx, texObj, target, level);
+    int srcx, srcy, dstx, dsty;
+
+    if (border)
+        goto fail;
+
+    /* Setup or redefine the texture object, mipmap tree and texture
+     * image.  Don't populate yet.
+     */
+    ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
+                           width, height, border,
+                           GL_RGBA, GL_UNSIGNED_BYTE, NULL,
+                           &ctx->DefaultPacking, texObj, texImage);
+
+    srcx = x;
+    srcy = y;
+    dstx = 0;
+    dsty = 0;
+    if (!_mesa_clip_copytexsubimage(ctx,
+                                    &dstx, &dsty,
+                                    &srcx, &srcy,
+                                    &width, &height)) {
+        return;
+    }
+
+    if (!do_copy_texsubimage(ctx, target, level,
+                             radeon_tex_obj(texObj), (radeon_texture_image *)texImage,
+                             0, 0, x, y, width, height)) {
+        goto fail;
+    }
+
+    return;
+
+fail:
+    _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y,
+                              width, height, border);
+}
+
+static void
+r600CopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
+                      GLint xoffset, GLint yoffset,
+                      GLint x, GLint y,
+                      GLsizei width, GLsizei height)
+{
+    struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
+    struct gl_texture_object *texObj = _mesa_select_tex_object(ctx, texUnit, target);
+    struct gl_texture_image *texImage = _mesa_select_tex_image(ctx, texObj, target, level);
+
+    if (!do_copy_texsubimage(ctx, target, level,
+                             radeon_tex_obj(texObj), (radeon_texture_image *)texImage,
+                             xoffset, yoffset, x, y, width, height)) {
+
+       //DEBUG_FALLBACKS
+
+        _mesa_meta_CopyTexSubImage2D(ctx, target, level,
+                                     xoffset, yoffset, x, y, width, height);
+    }
+}
+
+
+void r600_init_texcopy_functions(struct dd_function_table *table)
+{
+    table->CopyTexImage2D = r600CopyTexImage2D;
+    table->CopyTexSubImage2D = r600CopyTexSubImage2D;
+}




More information about the mesa-commit mailing list