[Mesa-dev] [PATCH] r600g: add in-place DB decompression and texturing with DB tiling
Marek Olšák
maraeo at gmail.com
Wed Oct 3 14:50:35 PDT 2012
The decompression is done in-place and only the compressed tiles are
decompressed. Note: R6xx-R7xx can do that only with Z16 and Z32F.
The texture unit is programmed to use non-displayable tiling and depth
ordering of samples, so that it can fetch the texture in the native DB format.
The latest version of the libdrm surface allocator is required for stencil
texturing to work. The old one didn't create the mipmap tree correctly.
We need a separate mipmap tree for stencil, because the stencil mipmap
offsets are not really depth offsets/4.
The DB->CB copy is still used for transfers.
---
I sent the libdrm patches a few minutes ago. I guess I will have to make another libdrm release.
What's good about this is that it improves performance by 4-5% with the 1024x768 resolution in Lightsmark on Evergreen. However, the larger the resolution, the smaller the improvement is (something else becomes the bottleneck). It also reduces the memory requirements for depth textures by 50%, because the "flushed depth texture" isn't needed anymore.
The catch is fetching the 4th stencil mipmap level gives wrong pixels in one not-yet-committed test. What's weird is that all the other mipmaps (both smaller and larger) are fetched correctly. That bug has yet to be fixed, but who is using a stencil buffer with mipmaps anyway? :)
src/gallium/auxiliary/util/u_blitter.c | 3 +-
.../drivers/r600/evergreen_compute_internal.c | 6 +-
src/gallium/drivers/r600/evergreen_state.c | 92 +++++++++++---------
src/gallium/drivers/r600/evergreend.h | 10 ++-
src/gallium/drivers/r600/r600_blit.c | 89 ++++++++++++++++---
src/gallium/drivers/r600/r600_pipe.h | 1 +
src/gallium/drivers/r600/r600_resource.h | 10 ++-
src/gallium/drivers/r600/r600_state.c | 13 +--
src/gallium/drivers/r600/r600_texture.c | 60 ++++++++++++-
9 files changed, 216 insertions(+), 68 deletions(-)
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 4ad7a6b..86109f0 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -1602,7 +1602,8 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
blitter_disable_render_cond(ctx);
/* bind states */
- pipe->bind_blend_state(pipe, ctx->blend[PIPE_MASK_RGBA]);
+ pipe->bind_blend_state(pipe, cbsurf ? ctx->blend[PIPE_MASK_RGBA] :
+ ctx->blend[0]);
pipe->bind_depth_stencil_alpha_state(pipe, dsa_stage);
ctx->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0, FALSE));
pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c
index 496d099..b937135 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.c
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
@@ -480,7 +480,7 @@ void evergreen_set_tex_resource(
unsigned format, endian;
uint32_t word4 = 0, yuv_format = 0, pitch = 0;
- unsigned char swizzle[4], array_mode = 0, tile_type = 0;
+ unsigned char swizzle[4], array_mode = 0, non_disp_tiling = 0;
unsigned height, depth;
swizzle[0] = 0;
@@ -503,7 +503,7 @@ void evergreen_set_tex_resource(
pitch = align(tmp->surface.level[0].nblk_x *
util_format_get_blockwidth(tmp->resource.b.b.format), 8);
array_mode = tmp->array_mode[0];
- tile_type = tmp->tile_type;
+ non_disp_tiling = tmp->non_disp_tiling;
assert(view->base.texture->target != PIPE_TEXTURE_1D_ARRAY);
assert(view->base.texture->target != PIPE_TEXTURE_2D_ARRAY);
@@ -513,7 +513,7 @@ void evergreen_set_tex_resource(
evergreen_emit_raw_value(res,
(S_030000_DIM(r600_tex_dim(view->base.texture->target)) |
S_030000_PITCH((pitch / 8) - 1) |
- S_030000_NON_DISP_TILING_ORDER(tile_type) |
+ S_030000_NON_DISP_TILING_ORDER(non_disp_tiling) |
S_030000_TEX_WIDTH(view->base.texture->width0 - 1)));
evergreen_emit_raw_value(res, (S_030004_TEX_HEIGHT(height - 1) |
S_030004_TEX_DEPTH(depth - 1) |
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index c126e7d..5a14934 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -990,9 +990,11 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
struct r600_texture *tmp = (struct r600_texture*)texture;
unsigned format, endian;
uint32_t word4 = 0, yuv_format = 0, pitch = 0;
- unsigned char swizzle[4], array_mode = 0, tile_type = 0;
+ unsigned char swizzle[4], array_mode = 0, non_disp_tiling = 0;
unsigned height, depth, width;
unsigned macro_aspect, tile_split, bankh, bankw, nbanks;
+ enum pipe_format pipe_format = state->format;
+ struct radeon_surface_level *surflevel;
if (view == NULL)
return NULL;
@@ -1010,7 +1012,27 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
swizzle[2] = state->swizzle_b;
swizzle[3] = state->swizzle_a;
- format = r600_translate_texformat(ctx->screen, state->format,
+ tile_split = tmp->surface.tile_split;
+ surflevel = tmp->surface.level;
+
+ /* Texturing with separate depth and stencil. */
+ if (tmp->is_depth && !tmp->is_flushing_texture) {
+ switch (pipe_format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ pipe_format = PIPE_FORMAT_Z32_FLOAT;
+ break;
+ case PIPE_FORMAT_X24S8_UINT:
+ case PIPE_FORMAT_S8X24_UINT:
+ case PIPE_FORMAT_X32_S8X24_UINT:
+ pipe_format = PIPE_FORMAT_S8_UINT;
+ tile_split = tmp->surface.stencil_tile_split;
+ surflevel = tmp->surface.stencil_level;
+ break;
+ default:;
+ }
+ }
+
+ format = r600_translate_texformat(ctx->screen, pipe_format,
swizzle,
&word4, &yuv_format);
assert(format != ~0);
@@ -1019,23 +1041,15 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
return NULL;
}
- if (tmp->is_depth && !tmp->is_flushing_texture) {
- if (!r600_init_flushed_depth_texture(ctx, texture, NULL)) {
- FREE(view);
- return NULL;
- }
- tmp = tmp->flushed_depth_texture;
- }
-
endian = r600_colorformat_endian_swap(format);
width = width0;
height = height0;
- depth = tmp->surface.level[0].npix_z;
- pitch = tmp->surface.level[0].nblk_x * util_format_get_blockwidth(state->format);
- tile_type = tmp->tile_type;
+ depth = surflevel[0].npix_z;
+ pitch = surflevel[0].nblk_x * util_format_get_blockwidth(pipe_format);
+ non_disp_tiling = tmp->non_disp_tiling;
- switch (tmp->surface.level[0].mode) {
+ switch (surflevel[0].mode) {
case RADEON_SURF_MODE_LINEAR_ALIGNED:
array_mode = V_028C70_ARRAY_LINEAR_ALIGNED;
break;
@@ -1050,7 +1064,6 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
array_mode = V_028C70_ARRAY_LINEAR_GENERAL;
break;
}
- tile_split = tmp->surface.tile_split;
macro_aspect = tmp->surface.mtilea;
bankw = tmp->surface.bankw;
bankh = tmp->surface.bankh;
@@ -1061,8 +1074,8 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
/* 128 bit formats require tile type = 1 */
if (rscreen->chip_class == CAYMAN) {
- if (util_format_get_blocksize(state->format) >= 16)
- tile_type = 1;
+ if (util_format_get_blocksize(pipe_format) >= 16)
+ non_disp_tiling = 1;
}
nbanks = eg_num_banks(rscreen->tiling_info.num_banks);
@@ -1078,17 +1091,17 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
S_030000_PITCH((pitch / 8) - 1) |
S_030000_TEX_WIDTH(width - 1));
if (rscreen->chip_class == CAYMAN)
- view->tex_resource_words[0] |= CM_S_030000_NON_DISP_TILING_ORDER(tile_type);
+ view->tex_resource_words[0] |= CM_S_030000_NON_DISP_TILING_ORDER(non_disp_tiling);
else
- view->tex_resource_words[0] |= S_030000_NON_DISP_TILING_ORDER(tile_type);
+ view->tex_resource_words[0] |= S_030000_NON_DISP_TILING_ORDER(non_disp_tiling);
view->tex_resource_words[1] = (S_030004_TEX_HEIGHT(height - 1) |
S_030004_TEX_DEPTH(depth - 1) |
S_030004_ARRAY_MODE(array_mode));
- view->tex_resource_words[2] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
+ view->tex_resource_words[2] = (surflevel[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
if (state->u.tex.last_level && texture->nr_samples <= 1) {
- view->tex_resource_words[3] = (tmp->surface.level[1].offset + r600_resource_va(ctx->screen, texture)) >> 8;
+ view->tex_resource_words[3] = (surflevel[1].offset + r600_resource_va(ctx->screen, texture)) >> 8;
} else {
- view->tex_resource_words[3] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
+ view->tex_resource_words[3] = (surflevel[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
}
view->tex_resource_words[4] = (word4 |
S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
@@ -1114,7 +1127,8 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
S_03001C_BANK_WIDTH(bankw) |
S_03001C_BANK_HEIGHT(bankh) |
S_03001C_MACRO_TILE_ASPECT(macro_aspect) |
- S_03001C_NUM_BANKS(nbanks);
+ S_03001C_NUM_BANKS(nbanks) |
+ S_03001C_DEPTH_SAMPLE_ORDER(tmp->is_depth && !tmp->is_flushing_texture);
return &view->base;
}
@@ -1252,17 +1266,11 @@ void evergreen_init_color_surface(struct r600_context *rctx,
unsigned color_info, color_attrib, color_dim = 0;
unsigned format, swap, ntype, endian;
uint64_t offset, base_offset;
- unsigned tile_type, macro_aspect, tile_split, bankh, bankw, fmask_bankh, nbanks;
+ unsigned non_disp_tiling, macro_aspect, tile_split, bankh, bankw, fmask_bankh, nbanks;
const struct util_format_description *desc;
int i;
bool blend_clamp = 0, blend_bypass = 0;
- if (rtex->is_depth && !rtex->is_flushing_texture) {
- r600_init_flushed_depth_texture(&rctx->context, pipe_tex, NULL);
- rtex = rtex->flushed_depth_texture;
- assert(rtex);
- }
-
offset = rtex->surface.level[level].offset;
if (rtex->surface.level[level].mode < RADEON_SURF_MODE_1D) {
offset += rtex->surface.level[level].slice_size *
@@ -1277,20 +1285,20 @@ void evergreen_init_color_surface(struct r600_context *rctx,
switch (rtex->surface.level[level].mode) {
case RADEON_SURF_MODE_LINEAR_ALIGNED:
color_info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_ALIGNED);
- tile_type = 1;
+ non_disp_tiling = 1;
break;
case RADEON_SURF_MODE_1D:
color_info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_1D_TILED_THIN1);
- tile_type = rtex->tile_type;
+ non_disp_tiling = rtex->non_disp_tiling;
break;
case RADEON_SURF_MODE_2D:
color_info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_2D_TILED_THIN1);
- tile_type = rtex->tile_type;
+ non_disp_tiling = rtex->non_disp_tiling;
break;
case RADEON_SURF_MODE_LINEAR:
default:
color_info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_GENERAL);
- tile_type = 1;
+ non_disp_tiling = 1;
break;
}
tile_split = rtex->surface.tile_split;
@@ -1307,7 +1315,7 @@ void evergreen_init_color_surface(struct r600_context *rctx,
/* 128 bit formats require tile type = 1 */
if (rscreen->chip_class == CAYMAN) {
if (util_format_get_blocksize(surf->base.format) >= 16)
- tile_type = 1;
+ non_disp_tiling = 1;
}
nbanks = eg_num_banks(rscreen->tiling_info.num_banks);
desc = util_format_description(surf->base.format);
@@ -1322,7 +1330,7 @@ void evergreen_init_color_surface(struct r600_context *rctx,
S_028C74_BANK_WIDTH(bankw) |
S_028C74_BANK_HEIGHT(bankh) |
S_028C74_MACRO_TILE_ASPECT(macro_aspect) |
- S_028C74_NON_DISP_TILING_ORDER(tile_type) |
+ S_028C74_NON_DISP_TILING_ORDER(non_disp_tiling) |
S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
if (rctx->chip_class == CAYMAN && rtex->resource.b.b.nr_samples > 1) {
@@ -1495,15 +1503,15 @@ static void evergreen_init_depth_surface(struct r600_context *rctx,
surf->db_depth_slice = S_02805C_SLICE_TILE_MAX(slice);
if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
- uint64_t stencil_offset = rtex->surface.stencil_offset;
+ uint64_t stencil_offset;
unsigned stile_split = rtex->surface.stencil_tile_split;
stile_split = eg_tile_split(stile_split);
+
+ stencil_offset = rtex->surface.stencil_level[level].offset;
stencil_offset += r600_resource_va(screen, surf->base.texture);
- stencil_offset += rtex->surface.level[level].offset / 4;
- stencil_offset >>= 8;
- surf->db_stencil_base = stencil_offset;
+ surf->db_stencil_base = stencil_offset >> 8;
surf->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8) |
S_028044_TILE_SPLIT(stile_split);
} else {
@@ -2047,6 +2055,10 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
S_028000_STENCIL_COPY_ENABLE(a->copy_stencil) |
S_028000_COPY_CENTROID(1) |
S_028000_COPY_SAMPLE(a->copy_sample);
+ } else if (a->flush_depthstencil_in_place) {
+ db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(1) |
+ S_028000_STENCIL_COMPRESS_DISABLE(1);
+ db_render_override |= S_02800C_DISABLE_PIXEL_RATE_TILES(1);
}
r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index d10ec7f..164ec01 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -1108,9 +1108,13 @@
#define C_030018_INTERLACED 0xFFFFFFBF
#define S_030018_TILE_SPLIT(x) (((x) & 0x7) << 29)
#define R_03001C_SQ_TEX_RESOURCE_WORD7_0 0x03001C
+#define S_03001C_DATA_FORMAT(x) (((x) & 0x3F) << 0)
+#define G_03001C_DATA_FORMAT(x) (((x) >> 0) & 0x3F)
+#define C_03001C_DATA_FORMAT 0xFFFFFFC0
#define S_03001C_MACRO_TILE_ASPECT(x) (((x) & 0x3) << 6)
#define S_03001C_BANK_WIDTH(x) (((x) & 0x3) << 8)
#define S_03001C_BANK_HEIGHT(x) (((x) & 0x3) << 10)
+#define S_03001C_DEPTH_SAMPLE_ORDER(x) (((x) & 0x1) << 15)
#define S_03001C_NUM_BANKS(x) (((x) & 0x3) << 16)
#define S_03001C_TYPE(x) (((x) & 0x3) << 30)
#define G_03001C_TYPE(x) (((x) >> 30) & 0x3)
@@ -1119,9 +1123,6 @@
#define V_03001C_SQ_TEX_VTX_INVALID_BUFFER 0x00000001
#define V_03001C_SQ_TEX_VTX_VALID_TEXTURE 0x00000002
#define V_03001C_SQ_TEX_VTX_VALID_BUFFER 0x00000003
-#define S_03001C_DATA_FORMAT(x) (((x) & 0x3F) << 0)
-#define G_03001C_DATA_FORMAT(x) (((x) >> 0) & 0x3F)
-#define C_03001C_DATA_FORMAT 0xFFFFFFC0
#define R_030008_SQ_VTX_CONSTANT_WORD2_0 0x030008
#define S_030008_BASE_ADDRESS_HI(x) (((x) & 0xFF) << 0)
@@ -1643,6 +1644,9 @@
#define S_02800C_IGNORE_SC_ZRANGE(x) (((x) & 0x1) << 17)
#define G_02800C_IGNORE_SC_ZRANGE(x) (((x) >> 17) & 0x1)
#define C_02800C_IGNORE_SC_ZRANGE 0xFFFDFFFF
+#define S_02800C_DISABLE_PIXEL_RATE_TILES(x) (((x) & 0x1) << 26)
+#define G_02800C_DISABLE_PIXEL_RATE_TILES(x) (((x) >> 26) & 0x1)
+#define C_02800C_DISABLE_PIXEL_RATE_TILES 0xFFFDFFFF
#define R_028010_DB_RENDER_OVERRIDE2 0x00028010
#define R_028014_DB_HTILE_DATA_BASE 0x00028014
#define R_028028_DB_STENCIL_CLEAR 0x00028028
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 9bbbc45..5c704af 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -161,7 +161,6 @@ void r600_blit_decompress_depth(struct pipe_context *ctx,
rctx->db_misc_state.copy_sample = first_sample;
r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
-
for (level = first_level; level <= last_level; level++) {
if (!staging && !(texture->dirty_level_mask & (1 << level)))
continue;
@@ -220,6 +219,58 @@ void r600_blit_decompress_depth(struct pipe_context *ctx,
r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
+static void r600_blit_decompress_depth_in_place(struct r600_context *rctx,
+ struct r600_texture *texture,
+ unsigned first_level, unsigned last_level,
+ unsigned first_layer, unsigned last_layer)
+{
+ struct pipe_surface *zsurf, surf_tmpl = {{0}};
+ unsigned layer, max_layer, checked_last_layer, level;
+
+ /* Enable decompression in DB_RENDER_CONTROL */
+ rctx->db_misc_state.flush_depthstencil_in_place = true;
+ r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
+
+ surf_tmpl.format = texture->resource.b.b.format;
+ surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL;
+
+ for (level = first_level; level <= last_level; level++) {
+ if (!(texture->dirty_level_mask & (1 << level)))
+ continue;
+
+ surf_tmpl.u.tex.level = level;
+
+ /* The smaller the mipmap level, the less layers there are
+ * as far as 3D textures are concerned. */
+ max_layer = u_max_layer(&texture->resource.b.b, level);
+ checked_last_layer = last_layer < max_layer ? last_layer : max_layer;
+
+ for (layer = first_layer; layer <= checked_last_layer; layer++) {
+ surf_tmpl.u.tex.first_layer = layer;
+ surf_tmpl.u.tex.last_layer = layer;
+
+ zsurf = rctx->context.create_surface(&rctx->context, &texture->resource.b.b, &surf_tmpl);
+
+ r600_blitter_begin(&rctx->context, R600_DECOMPRESS);
+ util_blitter_custom_depth_stencil(rctx->blitter, zsurf, NULL, ~0,
+ rctx->custom_dsa_flush, 1.0f);
+ r600_blitter_end(&rctx->context);
+
+ pipe_surface_reference(&zsurf, NULL);
+ }
+
+ /* The texture will always be dirty if some layers or samples aren't flushed.
+ * I don't think this case occurs often though. */
+ if (first_layer == 0 && last_layer == max_layer) {
+ texture->dirty_level_mask &= ~(1 << level);
+ }
+ }
+
+ /* Disable decompression in DB_RENDER_CONTROL */
+ rctx->db_misc_state.flush_depthstencil_in_place = false;
+ r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
+}
+
void r600_decompress_depth_textures(struct r600_context *rctx,
struct r600_samplerview_state *textures)
{
@@ -238,10 +289,17 @@ void r600_decompress_depth_textures(struct r600_context *rctx,
tex = (struct r600_texture *)view->texture;
assert(tex->is_depth && !tex->is_flushing_texture);
- r600_blit_decompress_depth(&rctx->context, tex, NULL,
- view->u.tex.first_level, view->u.tex.last_level,
- 0, u_max_layer(&tex->resource.b.b, view->u.tex.first_level),
- 0, u_max_sample(&tex->resource.b.b));
+ if (rctx->chip_class >= EVERGREEN ||
+ r600_can_read_depth(tex)) {
+ r600_blit_decompress_depth_in_place(rctx, tex,
+ view->u.tex.first_level, view->u.tex.last_level,
+ 0, u_max_layer(&tex->resource.b.b, view->u.tex.first_level));
+ } else {
+ r600_blit_decompress_depth(&rctx->context, tex, NULL,
+ view->u.tex.first_level, view->u.tex.last_level,
+ 0, u_max_layer(&tex->resource.b.b, view->u.tex.first_level),
+ 0, u_max_sample(&tex->resource.b.b));
+ }
}
}
@@ -285,7 +343,7 @@ static void r600_blit_decompress_color(struct pipe_context *ctx,
pipe_surface_reference(&cbsurf, NULL);
}
- /* The texture will always be dirty if some layers or samples aren't flushed.
+ /* The texture will always be dirty if some layers aren't flushed.
* I don't think this case occurs often though. */
if (first_layer == 0 && last_layer == max_layer) {
rtex->dirty_level_mask &= ~(1 << level);
@@ -337,13 +395,20 @@ static bool r600_decompress_subresource(struct pipe_context *ctx,
struct r600_texture *rtex = (struct r600_texture*)tex;
if (rtex->is_depth && !rtex->is_flushing_texture) {
- if (!r600_init_flushed_depth_texture(ctx, tex, NULL))
- return false; /* error */
+ if (rctx->chip_class >= EVERGREEN ||
+ r600_can_read_depth(rtex)) {
+ r600_blit_decompress_depth_in_place(rctx, rtex,
+ level, level,
+ first_layer, last_layer);
+ } else {
+ if (!r600_init_flushed_depth_texture(ctx, tex, NULL))
+ return false; /* error */
- r600_blit_decompress_depth(ctx, rtex, NULL,
- level, level,
- first_layer, last_layer,
- 0, u_max_sample(tex));
+ r600_blit_decompress_depth(ctx, rtex, NULL,
+ level, level,
+ first_layer, last_layer,
+ 0, u_max_sample(tex));
+ }
} else if (rctx->chip_class != CAYMAN && rtex->fmask_size && rtex->cmask_size) {
r600_blit_decompress_color(ctx, rtex, level, level,
first_layer, last_layer);
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 607116f..8a13e5c 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -69,6 +69,7 @@ struct r600_db_misc_state {
struct r600_atom atom;
bool occlusion_query_enabled;
bool flush_depthstencil_through_cb;
+ bool flush_depthstencil_in_place;
bool copy_depth, copy_stencil;
unsigned copy_sample;
unsigned log_samples;
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index a5a5404..688ae53 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -48,7 +48,7 @@ struct r600_texture {
unsigned array_mode[PIPE_MAX_TEXTURE_LEVELS];
unsigned pitch_override;
unsigned size;
- unsigned tile_type;
+ bool non_disp_tiling;
bool is_depth;
bool is_rat;
unsigned dirty_level_mask; /* each bit says if that mipmap is compressed */
@@ -114,6 +114,14 @@ struct r600_surface {
unsigned db_prefetch_limit; /* R600 only */
};
+/* Return if the depth format can be read without the DB->CB copy on r6xx-r7xx. */
+static INLINE bool r600_can_read_depth(struct r600_texture *rtex)
+{
+ return rtex->resource.b.b.nr_samples <= 1 &&
+ (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
+ rtex->resource.b.b.format == PIPE_FORMAT_Z32_FLOAT);
+}
+
void r600_resource_destroy(struct pipe_screen *screen, struct pipe_resource *res);
void r600_init_screen_resource_functions(struct pipe_screen *screen);
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 1d6171d..1bb0abb 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1017,7 +1017,7 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
struct r600_texture *tmp = (struct r600_texture*)texture;
unsigned format, endian;
uint32_t word4 = 0, yuv_format = 0, pitch = 0;
- unsigned char swizzle[4], array_mode = 0, tile_type = 0;
+ unsigned char swizzle[4], array_mode = 0;
unsigned width, height, depth, offset_level, last_level;
if (view == NULL)
@@ -1045,7 +1045,7 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
return NULL;
}
- if (tmp->is_depth && !tmp->is_flushing_texture) {
+ if (tmp->is_depth && !tmp->is_flushing_texture && !r600_can_read_depth(tmp)) {
if (!r600_init_flushed_depth_texture(ctx, texture, NULL)) {
FREE(view);
return NULL;
@@ -1061,7 +1061,6 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
height = height_first_level;
depth = tmp->surface.level[offset_level].npix_z;
pitch = tmp->surface.level[offset_level].nblk_x * util_format_get_blockwidth(state->format);
- tile_type = tmp->tile_type;
if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
height = 1;
@@ -1088,7 +1087,7 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
view->tex_resource = &tmp->resource;
view->tex_resource_words[0] = (S_038000_DIM(r600_tex_dim(texture->target, texture->nr_samples)) |
S_038000_TILE_MODE(array_mode) |
- S_038000_TILE_TYPE(tile_type) |
+ S_038000_TILE_TYPE(tmp->non_disp_tiling) |
S_038000_PITCH((pitch / 8) - 1) |
S_038000_TEX_WIDTH(width - 1));
view->tex_resource_words[1] = (S_038004_TEX_HEIGHT(height - 1) |
@@ -1214,7 +1213,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
int i;
bool blend_bypass = 0, blend_clamp = 1;
- if (rtex->is_depth && !rtex->is_flushing_texture) {
+ if (rtex->is_depth && !rtex->is_flushing_texture && !r600_can_read_depth(rtex)) {
r600_init_flushed_depth_texture(&rctx->context, surf->base.texture, NULL);
rtex = rtex->flushed_depth_texture;
assert(rtex);
@@ -1858,6 +1857,10 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
S_028D0C_STENCIL_COPY_ENABLE(a->copy_stencil) |
S_028D0C_COPY_CENTROID(1) |
S_028D0C_COPY_SAMPLE(a->copy_sample);
+ } else if (a->flush_depthstencil_in_place) {
+ db_render_control |= S_028D0C_DEPTH_COMPRESS_DISABLE(1) |
+ S_028D0C_STENCIL_COMPRESS_DISABLE(1);
+ db_render_override |= S_028D10_NOOP_CULL_DISABLE(1);
}
r600_write_context_reg_seq(cs, R_028D0C_DB_RENDER_CONTROL, 2);
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 4fb10ca..14aa02f 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -151,7 +151,8 @@ static int r600_init_surface(struct r600_screen *rscreen,
surface->flags |= RADEON_SURF_ZBUFFER;
if (is_stencil) {
- surface->flags |= RADEON_SURF_SBUFFER;
+ surface->flags |= RADEON_SURF_SBUFFER |
+ RADEON_SURF_HAS_SBUFFER_MIPTREE;
}
}
return 0;
@@ -179,7 +180,8 @@ static int r600_setup_surface(struct pipe_screen *screen,
rtex->surface.level[0].pitch_bytes = pitch_in_bytes_override;
rtex->surface.level[0].slice_size = pitch_in_bytes_override * rtex->surface.level[0].nblk_y;
if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
- rtex->surface.stencil_offset = rtex->surface.level[0].slice_size;
+ rtex->surface.stencil_offset =
+ rtex->surface.stencil_level[0].offset = rtex->surface.level[0].slice_size;
}
}
for (i = 0; i <= ptex->last_level; i++) {
@@ -375,6 +377,8 @@ static void r600_texture_allocate_cmask(struct r600_screen *rscreen,
#endif
}
+DEBUG_GET_ONCE_BOOL_OPTION(print_texdepth, "R600_PRINT_TEXDEPTH", FALSE);
+
static struct r600_texture *
r600_texture_create_object(struct pipe_screen *screen,
const struct pipe_resource *base,
@@ -421,6 +425,9 @@ r600_texture_create_object(struct pipe_screen *screen,
return NULL;
}
+ /* Tiled depth textures utilize the non-displayable tile order. */
+ rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D;
+
/* Now create the backing buffer. */
if (!buf && alloc_bo) {
unsigned base_align = rtex->surface.bo_alignment;
@@ -442,6 +449,52 @@ r600_texture_create_object(struct pipe_screen *screen,
memset(ptr + rtex->cmask_offset, 0xCC, rtex->cmask_size);
rscreen->ws->buffer_unmap(resource->cs_buf);
}
+
+ if (debug_get_option_print_texdepth() && rtex->is_depth && rtex->non_disp_tiling) {
+ printf("Texture: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
+ "blk_h=%u, blk_d=%u, array_size=%u, last_level=%u, "
+ "bpe=%u, nsamples=%u, flags=%u\n",
+ rtex->surface.npix_x, rtex->surface.npix_y,
+ rtex->surface.npix_z, rtex->surface.blk_w,
+ rtex->surface.blk_h, rtex->surface.blk_d,
+ rtex->surface.array_size, rtex->surface.last_level,
+ rtex->surface.bpe, rtex->surface.nsamples,
+ rtex->surface.flags);
+ if (rtex->surface.flags & RADEON_SURF_ZBUFFER) {
+ for (int i = 0; i <= rtex->surface.last_level; i++) {
+ printf(" Z %i: offset=%llu, slice_size=%llu, npix_x=%u, "
+ "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
+ "nblk_z=%u, pitch_bytes=%u, mode=%u\n",
+ i, rtex->surface.level[i].offset,
+ rtex->surface.level[i].slice_size,
+ rtex->surface.level[i].npix_x,
+ rtex->surface.level[i].npix_y,
+ rtex->surface.level[i].npix_z,
+ rtex->surface.level[i].nblk_x,
+ rtex->surface.level[i].nblk_y,
+ rtex->surface.level[i].nblk_z,
+ rtex->surface.level[i].pitch_bytes,
+ rtex->surface.level[i].mode);
+ }
+ }
+ if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
+ for (int i = 0; i <= rtex->surface.last_level; i++) {
+ printf(" S %i: offset=%llu, slice_size=%llu, npix_x=%u, "
+ "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
+ "nblk_z=%u, pitch_bytes=%u, mode=%u\n",
+ i, rtex->surface.stencil_level[i].offset,
+ rtex->surface.stencil_level[i].slice_size,
+ rtex->surface.stencil_level[i].npix_x,
+ rtex->surface.stencil_level[i].npix_y,
+ rtex->surface.stencil_level[i].npix_z,
+ rtex->surface.stencil_level[i].nblk_x,
+ rtex->surface.stencil_level[i].nblk_y,
+ rtex->surface.stencil_level[i].nblk_z,
+ rtex->surface.stencil_level[i].pitch_bytes,
+ rtex->surface.stencil_level[i].mode);
+ }
+ }
+ }
return rtex;
}
@@ -600,6 +653,7 @@ bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
}
(*flushed_depth_texture)->is_flushing_texture = TRUE;
+ (*flushed_depth_texture)->non_disp_tiling = false;
return true;
}
@@ -870,7 +924,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen,
/* Colorspace (return non-RGB formats directly). */
switch (desc->colorspace) {
- /* Depth stencil formats */
+ /* Depth stencil formats */
case UTIL_FORMAT_COLORSPACE_ZS:
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
--
1.7.9.5
More information about the mesa-dev
mailing list