Mesa (master): r600: add span support for 1D tiles

Alex Deucher agd5f at kemper.freedesktop.org
Mon Sep 14 22:11:44 UTC 2009


Module: Mesa
Branch: master
Commit: 364ca57aff733e8ee5f417b3f8719514f443315f
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=364ca57aff733e8ee5f417b3f8719514f443315f

Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Mon Sep 14 18:05:15 2009 -0400

r600: add span support for 1D tiles

1D tile span support for depth/stencil/color/textures

Z and stencil buffers are always tiled, so this fixes
sw access to Z and stencil buffers.  color and textures
are currently linear, but this adds span support when we
implement 1D tiling.

This fixes the text in progs/demos/engine and progs/tests/z*

---

 src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h |    2 +
 src/mesa/drivers/dri/r600/r700_chip.c          |    2 +-
 src/mesa/drivers/dri/radeon/radeon_span.c      |  220 ++++++++++++++++++++++++
 3 files changed, 223 insertions(+), 1 deletions(-)

diff --git a/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h b/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h
index 9d5aa3c..edd85b0 100644
--- a/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h
+++ b/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h
@@ -1366,6 +1366,7 @@ enum {
 	DB_DEPTH_INFO__READ_SIZE_bit                      = 1 << 3,
 	DB_DEPTH_INFO__ARRAY_MODE_mask                    = 0x0f << 15,
 	DB_DEPTH_INFO__ARRAY_MODE_shift                   = 15,
+	    ARRAY_1D_TILED_THIN1                          = 0x02,
 	    ARRAY_2D_TILED_THIN1                          = 0x04,
 	TILE_SURFACE_ENABLE_bit                           = 1 << 25,
 	TILE_COMPACT_bit                                  = 1 << 26,
@@ -1449,6 +1450,7 @@ enum {
 	CB_COLOR0_INFO__ARRAY_MODE_shift                  = 8,
 	    ARRAY_LINEAR_GENERAL                          = 0x00,
 	    ARRAY_LINEAR_ALIGNED                          = 0x01,
+/*	    ARRAY_1D_TILED_THIN1                          = 0x02, */
 /* 	    ARRAY_2D_TILED_THIN1                          = 0x04, */
 	NUMBER_TYPE_mask                                  = 0x07 << 12,
 	NUMBER_TYPE_shift                                 = 12,
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index 1b56059..06d7e9c 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -351,7 +351,7 @@ static void r700SetDepthTarget(context_t *context)
         SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_16,
                      DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask);
     }
-    SETfield(r700->DB_DEPTH_INFO.u32All, ARRAY_2D_TILED_THIN1,
+    SETfield(r700->DB_DEPTH_INFO.u32All, ARRAY_1D_TILED_THIN1,
              DB_DEPTH_INFO__ARRAY_MODE_shift, DB_DEPTH_INFO__ARRAY_MODE_mask);
     /* r700->DB_PREFETCH_LIMIT.bits.DEPTH_HEIGHT_TILE_MAX = (context->currentDraw->h >> 3) - 1; */ /* z buffer sie may much bigger than what need, so use actual used h. */
 }
diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c
index 4e100d8..aa20353 100644
--- a/src/mesa/drivers/dri/radeon/radeon_span.c
+++ b/src/mesa/drivers/dri/radeon/radeon_span.c
@@ -106,6 +106,142 @@ static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
 }
 #endif
 
+/* r600 tiling
+ * two main types:
+ * - 1D (akin to macro-linear/micro-tiled on older asics)
+ * - 2D (akin to macro-tiled/micro-tiled on older asics)
+ * only 1D tiling is implemented below
+ */
+#if defined(RADEON_COMMON_FOR_R600)
+static GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
+				 GLint x, GLint y, GLint is_depth, GLint is_stencil)
+{
+    GLint element_bytes = rrb->cpp;
+    GLint num_samples = 1;
+    GLint tile_width = 8;
+    GLint tile_height = 8;
+    GLint tile_thickness = 1;
+    GLint pitch_elements = rrb->pitch / element_bytes;
+    GLint height = rrb->base.Height;
+    GLint z = 0;
+    GLint sample_number = 0;
+    /* */
+    GLint tile_bytes;
+    GLint tiles_per_row;
+    GLint tiles_per_slice;
+    GLint slice_offset;
+    GLint tile_row_index;
+    GLint tile_column_index;
+    GLint tile_offset;
+    GLint pixel_number = 0;
+    GLint element_offset;
+    GLint offset = 0;
+
+    tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples;
+    tiles_per_row = pitch_elements /tile_width;
+    tiles_per_slice = tiles_per_row * (height / tile_height);
+    slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes;
+    tile_row_index = y / tile_height;
+    tile_column_index = x / tile_width;
+    tile_offset = ((tile_row_index * tiles_per_row) + tile_column_index) * tile_bytes;
+
+    if (is_depth) {
+	    GLint pixel_offset = 0;
+
+	    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+	    pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
+	    pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
+	    pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
+	    pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
+	    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+	    switch (element_bytes) {
+	    case 2:
+		    pixel_offset = pixel_number * element_bytes * num_samples;
+		    element_offset = pixel_offset + (sample_number * element_bytes);
+		    break;
+	    case 4:
+		    /* stencil and depth data are stored separately within a tile.
+		     * stencil is stored in a contiguous tile before the depth tile.
+		     * stencil element is 1 byte, depth element is 3 bytes.
+		     * stencil tile is 64 bytes.
+		     */
+		    if (is_stencil)
+			    pixel_offset = pixel_number * 1 * num_samples;
+		    else
+			    pixel_offset = (pixel_number * 3 * num_samples) + 64;
+		    break;
+	    }
+	    element_offset = pixel_offset + (sample_number * element_bytes);
+    } else {
+	    GLint sample_offset;
+
+	    switch (element_bytes) {
+	    case 1:
+		    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+		    pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+		    pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
+		    pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
+		    pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
+		    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+		    break;
+	    case 2:
+		    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+		    pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+		    pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
+		    pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0]
+		    pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
+		    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+		    break;
+	    case 4:
+		    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+		    pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+		    pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0]
+		    pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2]
+		    pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
+		    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+		    break;
+	    }
+	    sample_offset = sample_number * (tile_bytes / num_samples);
+	    element_offset = sample_offset + (pixel_number * element_bytes);
+    }
+    offset = slice_offset + tile_offset + element_offset;
+    return offset;
+}
+
+/* depth buffers */
+static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb,
+			       GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
+    return &ptr[offset];
+}
+
+static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb,
+				 GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
+    return &ptr[offset];
+}
+
+static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb,
+			       GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
+    GLint offset;
+
+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
+        offset = x * rrb->cpp + y * rrb->pitch;
+    } else {
+	    offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
+    }
+    return &ptr[offset];
+}
+
+#endif
+
 /* radeon tiling on r300-r500 has 4 states,
    macro-linear/micro-linear
    macro-linear/micro-tiled
@@ -270,7 +406,11 @@ s8z24_to_z24s8(uint32_t val)
 
 #define TAG(x)    radeon##x##_RGB565
 #define TAG2(x,y) radeon##x##_RGB565##y
+#if defined(RADEON_COMMON_FOR_R600)
+#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
+#else
 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#endif
 #include "spantmp2.h"
 
 /* 16 bit, ARGB1555 color spanline and pixel functions
@@ -280,7 +420,11 @@ s8z24_to_z24s8(uint32_t val)
 
 #define TAG(x)    radeon##x##_ARGB1555
 #define TAG2(x,y) radeon##x##_ARGB1555##y
+#if defined(RADEON_COMMON_FOR_R600)
+#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
+#else
 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#endif
 #include "spantmp2.h"
 
 /* 16 bit, RGBA4 color spanline and pixel functions
@@ -290,7 +434,11 @@ s8z24_to_z24s8(uint32_t val)
 
 #define TAG(x)    radeon##x##_ARGB4444
 #define TAG2(x,y) radeon##x##_ARGB4444##y
+#if defined(RADEON_COMMON_FOR_R600)
+#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
+#else
 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#endif
 #include "spantmp2.h"
 
 /* 32 bit, xRGB8888 color spanline and pixel functions
@@ -300,11 +448,19 @@ s8z24_to_z24s8(uint32_t val)
 
 #define TAG(x)    radeon##x##_xRGB8888
 #define TAG2(x,y) radeon##x##_xRGB8888##y
+#if defined(RADEON_COMMON_FOR_R600)
+#define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000))
+#define PUT_VALUE(_x, _y, d) { \
+   GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off );		\
+   *_ptr = d;								\
+} while (0)
+#else
 #define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000))
 #define PUT_VALUE(_x, _y, d) { \
    GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
    *_ptr = d;								\
 } while (0)
+#endif
 #include "spantmp2.h"
 
 /* 32 bit, ARGB8888 color spanline and pixel functions
@@ -314,11 +470,19 @@ s8z24_to_z24s8(uint32_t val)
 
 #define TAG(x)    radeon##x##_ARGB8888
 #define TAG2(x,y) radeon##x##_ARGB8888##y
+#if defined(RADEON_COMMON_FOR_R600)
+#define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)))
+#define PUT_VALUE(_x, _y, d) { \
+   GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off );		\
+   *_ptr = d;								\
+} while (0)
+#else
 #define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)))
 #define PUT_VALUE(_x, _y, d) { \
    GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
    *_ptr = d;								\
 } while (0)
+#endif
 #include "spantmp2.h"
 
 /* ================================================================
@@ -342,6 +506,9 @@ s8z24_to_z24s8(uint32_t val)
 #if defined(RADEON_COMMON_FOR_R200)
 #define WRITE_DEPTH( _x, _y, d )					\
    *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
+#elif defined(RADEON_COMMON_FOR_R600)
+#define WRITE_DEPTH( _x, _y, d )					\
+   *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d
 #else
 #define WRITE_DEPTH( _x, _y, d )					\
    *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
@@ -350,6 +517,9 @@ s8z24_to_z24s8(uint32_t val)
 #if defined(RADEON_COMMON_FOR_R200)
 #define READ_DEPTH( d, _x, _y )						\
    d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
+#elif defined(RADEON_COMMON_FOR_R600)
+#define READ_DEPTH( d, _x, _y )						\
+   d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off)
 #else
 #define READ_DEPTH( d, _x, _y )						\
    d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off)
@@ -374,6 +544,15 @@ do {									\
    tmp |= ((d << 8) & 0xffffff00);					\
    *_ptr = tmp;					\
 } while (0)
+#elif defined(RADEON_COMMON_FOR_R600)
+#define WRITE_DEPTH( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off );		\
+   GLuint tmp = *_ptr;				\
+   tmp &= 0xff000000;							\
+   tmp |= ((d) & 0x00ffffff);					\
+   *_ptr = tmp;					\
+} while (0)
 #elif defined(RADEON_COMMON_FOR_R200)
 #define WRITE_DEPTH( _x, _y, d )					\
 do {									\
@@ -399,6 +578,11 @@ do {									\
   do {									\
     d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
   }while(0)
+#elif defined(RADEON_COMMON_FOR_R600)
+#define READ_DEPTH( d, _x, _y )						\
+  do {									\
+    d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \
+  }while(0)
 #elif defined(RADEON_COMMON_FOR_R200)
 #define READ_DEPTH( d, _x, _y )						\
   do {									\
@@ -426,6 +610,20 @@ do {									\
    GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
    *_ptr = d;								\
 } while (0)
+#elif defined(RADEON_COMMON_FOR_R600)
+#define WRITE_DEPTH( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off );		\
+   GLuint tmp = *_ptr;				\
+   tmp &= 0xff000000;							\
+   tmp |= (((d) >> 8) & 0x00ffffff);					\
+   *_ptr = tmp;					\
+   _ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off);		\
+   tmp = *_ptr;				\
+   tmp &= 0xffffff00;							\
+   tmp |= (d) & 0xff;							\
+   *_ptr = tmp;					\
+} while (0)
 #elif defined(RADEON_COMMON_FOR_R200)
 #define WRITE_DEPTH( _x, _y, d )					\
 do {									\
@@ -447,6 +645,12 @@ do {									\
   do { \
     d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)));	\
   }while(0)
+#elif defined(RADEON_COMMON_FOR_R600)
+#define READ_DEPTH( d, _x, _y )						\
+  do { \
+    d = ((*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) << 8) & 0xffffff00; \
+    d |= (*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) & 0x000000ff;	\
+  }while(0)
 #elif defined(RADEON_COMMON_FOR_R200)
 #define READ_DEPTH( d, _x, _y )						\
   do { \
@@ -476,6 +680,15 @@ do {									\
    tmp |= (d) & 0xff;							\
    *_ptr = tmp;					\
 } while (0)
+#elif defined(RADEON_COMMON_FOR_R600)
+#define WRITE_STENCIL( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off);		\
+   GLuint tmp = *_ptr;				\
+   tmp &= 0xffffff00;							\
+   tmp |= (d) & 0xff;							\
+   *_ptr = tmp;					\
+} while (0)
 #elif defined(RADEON_COMMON_FOR_R200)
 #define WRITE_STENCIL( _x, _y, d )					\
 do {									\
@@ -503,6 +716,13 @@ do {									\
    GLuint tmp = *_ptr;				\
    d = tmp & 0x000000ff;						\
 } while (0)
+#elif defined(RADEON_COMMON_FOR_R600)
+#define READ_STENCIL( d, _x, _y )					\
+do {									\
+   GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off );		\
+   GLuint tmp = *_ptr;				\
+   d = tmp & 0x000000ff;						\
+} while (0)
 #elif defined(RADEON_COMMON_FOR_R200)
 #define READ_STENCIL( d, _x, _y )					\
 do {									\




More information about the mesa-commit mailing list