[Mesa-dev] [PATCH] r600g: add in-place DB decompression and texturing with DB tiling

Jerome Glisse j.glisse at gmail.com
Thu Oct 4 07:06:14 PDT 2012


On Wed, Oct 3, 2012 at 5:50 PM, Marek Olšák <maraeo at gmail.com> wrote:
> The decompression is done in-place and only the compressed tiles are
> decompressed. Note: R6xx-R7xx can do that only with Z16 and Z32F.
>
> The texture unit is programmed to use non-displayable tiling and depth
> ordering of samples, so that it can fetch the texture in the native DB format.
>
> The latest version of the libdrm surface allocator is required for stencil
> texturing to work. The old one didn't create the mipmap tree correctly.
> We need a separate mipmap tree for stencil, because the stencil mipmap
> offsets are not really depth offsets/4.
>
> The DB->CB copy is still used for transfers.
> ---
>
> I sent the libdrm patches a few minutes ago. I guess I will have to make another libdrm release.
>
> What's good about this is that it improves performance by 4-5% with the 1024x768 resolution in Lightsmark on Evergreen. However, the larger the resolution, the smaller the improvement is (something else becomes the bottleneck). It also reduces the memory requirements for depth textures by 50%, because the "flushed depth texture" isn't needed anymore.
>
> The catch is fetching the 4th stencil mipmap level gives wrong pixels in one not-yet-committed test. What's weird is that all the other mipmaps (both smaller and larger) are fetched correctly. That bug has yet to be fixed, but who is using a stencil buffer with mipmaps anyway? :)

This 4th level might be the usual switching point btw 2d tiled and 1d
tiled ... ie we think the hw is still using 2d while it switched to 1d
(or the other way around)

Otherwise reviewed

Cheers,
Jerome

>
>  src/gallium/auxiliary/util/u_blitter.c             |    3 +-
>  .../drivers/r600/evergreen_compute_internal.c      |    6 +-
>  src/gallium/drivers/r600/evergreen_state.c         |   92 +++++++++++---------
>  src/gallium/drivers/r600/evergreend.h              |   10 ++-
>  src/gallium/drivers/r600/r600_blit.c               |   89 ++++++++++++++++---
>  src/gallium/drivers/r600/r600_pipe.h               |    1 +
>  src/gallium/drivers/r600/r600_resource.h           |   10 ++-
>  src/gallium/drivers/r600/r600_state.c              |   13 +--
>  src/gallium/drivers/r600/r600_texture.c            |   60 ++++++++++++-
>  9 files changed, 216 insertions(+), 68 deletions(-)
>
> diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
> index 4ad7a6b..86109f0 100644
> --- a/src/gallium/auxiliary/util/u_blitter.c
> +++ b/src/gallium/auxiliary/util/u_blitter.c
> @@ -1602,7 +1602,8 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
>     blitter_disable_render_cond(ctx);
>
>     /* bind states */
> -   pipe->bind_blend_state(pipe, ctx->blend[PIPE_MASK_RGBA]);
> +   pipe->bind_blend_state(pipe, cbsurf ? ctx->blend[PIPE_MASK_RGBA] :
> +                                         ctx->blend[0]);
>     pipe->bind_depth_stencil_alpha_state(pipe, dsa_stage);
>     ctx->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0, FALSE));
>     pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
> diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c
> index 496d099..b937135 100644
> --- a/src/gallium/drivers/r600/evergreen_compute_internal.c
> +++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
> @@ -480,7 +480,7 @@ void evergreen_set_tex_resource(
>
>         unsigned format, endian;
>         uint32_t word4 = 0, yuv_format = 0, pitch = 0;
> -       unsigned char swizzle[4], array_mode = 0, tile_type = 0;
> +       unsigned char swizzle[4], array_mode = 0, non_disp_tiling = 0;
>         unsigned height, depth;
>
>         swizzle[0] = 0;
> @@ -503,7 +503,7 @@ void evergreen_set_tex_resource(
>         pitch = align(tmp->surface.level[0].nblk_x *
>                 util_format_get_blockwidth(tmp->resource.b.b.format), 8);
>         array_mode = tmp->array_mode[0];
> -       tile_type = tmp->tile_type;
> +       non_disp_tiling = tmp->non_disp_tiling;
>
>         assert(view->base.texture->target != PIPE_TEXTURE_1D_ARRAY);
>         assert(view->base.texture->target != PIPE_TEXTURE_2D_ARRAY);
> @@ -513,7 +513,7 @@ void evergreen_set_tex_resource(
>         evergreen_emit_raw_value(res,
>                                 (S_030000_DIM(r600_tex_dim(view->base.texture->target)) |
>                                 S_030000_PITCH((pitch / 8) - 1) |
> -                               S_030000_NON_DISP_TILING_ORDER(tile_type) |
> +                               S_030000_NON_DISP_TILING_ORDER(non_disp_tiling) |
>                                 S_030000_TEX_WIDTH(view->base.texture->width0 - 1)));
>         evergreen_emit_raw_value(res, (S_030004_TEX_HEIGHT(height - 1) |
>                                 S_030004_TEX_DEPTH(depth - 1) |
> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
> index c126e7d..5a14934 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -990,9 +990,11 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
>         struct r600_texture *tmp = (struct r600_texture*)texture;
>         unsigned format, endian;
>         uint32_t word4 = 0, yuv_format = 0, pitch = 0;
> -       unsigned char swizzle[4], array_mode = 0, tile_type = 0;
> +       unsigned char swizzle[4], array_mode = 0, non_disp_tiling = 0;
>         unsigned height, depth, width;
>         unsigned macro_aspect, tile_split, bankh, bankw, nbanks;
> +       enum pipe_format pipe_format = state->format;
> +       struct radeon_surface_level *surflevel;
>
>         if (view == NULL)
>                 return NULL;
> @@ -1010,7 +1012,27 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
>         swizzle[2] = state->swizzle_b;
>         swizzle[3] = state->swizzle_a;
>
> -       format = r600_translate_texformat(ctx->screen, state->format,
> +       tile_split = tmp->surface.tile_split;
> +       surflevel = tmp->surface.level;
> +
> +       /* Texturing with separate depth and stencil. */
> +       if (tmp->is_depth && !tmp->is_flushing_texture) {
> +               switch (pipe_format) {
> +               case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
> +                       pipe_format = PIPE_FORMAT_Z32_FLOAT;
> +                       break;
> +               case PIPE_FORMAT_X24S8_UINT:
> +               case PIPE_FORMAT_S8X24_UINT:
> +               case PIPE_FORMAT_X32_S8X24_UINT:
> +                       pipe_format = PIPE_FORMAT_S8_UINT;
> +                       tile_split = tmp->surface.stencil_tile_split;
> +                       surflevel = tmp->surface.stencil_level;
> +                       break;
> +               default:;
> +               }
> +       }
> +
> +       format = r600_translate_texformat(ctx->screen, pipe_format,
>                                           swizzle,
>                                           &word4, &yuv_format);
>         assert(format != ~0);
> @@ -1019,23 +1041,15 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
>                 return NULL;
>         }
>
> -       if (tmp->is_depth && !tmp->is_flushing_texture) {
> -               if (!r600_init_flushed_depth_texture(ctx, texture, NULL)) {
> -                       FREE(view);
> -                       return NULL;
> -               }
> -               tmp = tmp->flushed_depth_texture;
> -       }
> -
>         endian = r600_colorformat_endian_swap(format);
>
>         width = width0;
>         height = height0;
> -       depth = tmp->surface.level[0].npix_z;
> -       pitch = tmp->surface.level[0].nblk_x * util_format_get_blockwidth(state->format);
> -       tile_type = tmp->tile_type;
> +       depth = surflevel[0].npix_z;
> +       pitch = surflevel[0].nblk_x * util_format_get_blockwidth(pipe_format);
> +       non_disp_tiling = tmp->non_disp_tiling;
>
> -       switch (tmp->surface.level[0].mode) {
> +       switch (surflevel[0].mode) {
>         case RADEON_SURF_MODE_LINEAR_ALIGNED:
>                 array_mode = V_028C70_ARRAY_LINEAR_ALIGNED;
>                 break;
> @@ -1050,7 +1064,6 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
>                 array_mode = V_028C70_ARRAY_LINEAR_GENERAL;
>                 break;
>         }
> -       tile_split = tmp->surface.tile_split;
>         macro_aspect = tmp->surface.mtilea;
>         bankw = tmp->surface.bankw;
>         bankh = tmp->surface.bankh;
> @@ -1061,8 +1074,8 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
>
>         /* 128 bit formats require tile type = 1 */
>         if (rscreen->chip_class == CAYMAN) {
> -               if (util_format_get_blocksize(state->format) >= 16)
> -                       tile_type = 1;
> +               if (util_format_get_blocksize(pipe_format) >= 16)
> +                       non_disp_tiling = 1;
>         }
>         nbanks = eg_num_banks(rscreen->tiling_info.num_banks);
>
> @@ -1078,17 +1091,17 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
>                                        S_030000_PITCH((pitch / 8) - 1) |
>                                        S_030000_TEX_WIDTH(width - 1));
>         if (rscreen->chip_class == CAYMAN)
> -               view->tex_resource_words[0] |= CM_S_030000_NON_DISP_TILING_ORDER(tile_type);
> +               view->tex_resource_words[0] |= CM_S_030000_NON_DISP_TILING_ORDER(non_disp_tiling);
>         else
> -               view->tex_resource_words[0] |= S_030000_NON_DISP_TILING_ORDER(tile_type);
> +               view->tex_resource_words[0] |= S_030000_NON_DISP_TILING_ORDER(non_disp_tiling);
>         view->tex_resource_words[1] = (S_030004_TEX_HEIGHT(height - 1) |
>                                        S_030004_TEX_DEPTH(depth - 1) |
>                                        S_030004_ARRAY_MODE(array_mode));
> -       view->tex_resource_words[2] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
> +       view->tex_resource_words[2] = (surflevel[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
>         if (state->u.tex.last_level && texture->nr_samples <= 1) {
> -               view->tex_resource_words[3] = (tmp->surface.level[1].offset + r600_resource_va(ctx->screen, texture)) >> 8;
> +               view->tex_resource_words[3] = (surflevel[1].offset + r600_resource_va(ctx->screen, texture)) >> 8;
>         } else {
> -               view->tex_resource_words[3] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
> +               view->tex_resource_words[3] = (surflevel[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
>         }
>         view->tex_resource_words[4] = (word4 |
>                                        S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
> @@ -1114,7 +1127,8 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
>                                       S_03001C_BANK_WIDTH(bankw) |
>                                       S_03001C_BANK_HEIGHT(bankh) |
>                                       S_03001C_MACRO_TILE_ASPECT(macro_aspect) |
> -                                     S_03001C_NUM_BANKS(nbanks);
> +                                     S_03001C_NUM_BANKS(nbanks) |
> +                                     S_03001C_DEPTH_SAMPLE_ORDER(tmp->is_depth && !tmp->is_flushing_texture);
>         return &view->base;
>  }
>
> @@ -1252,17 +1266,11 @@ void evergreen_init_color_surface(struct r600_context *rctx,
>         unsigned color_info, color_attrib, color_dim = 0;
>         unsigned format, swap, ntype, endian;
>         uint64_t offset, base_offset;
> -       unsigned tile_type, macro_aspect, tile_split, bankh, bankw, fmask_bankh, nbanks;
> +       unsigned non_disp_tiling, macro_aspect, tile_split, bankh, bankw, fmask_bankh, nbanks;
>         const struct util_format_description *desc;
>         int i;
>         bool blend_clamp = 0, blend_bypass = 0;
>
> -       if (rtex->is_depth && !rtex->is_flushing_texture) {
> -               r600_init_flushed_depth_texture(&rctx->context, pipe_tex, NULL);
> -               rtex = rtex->flushed_depth_texture;
> -               assert(rtex);
> -       }
> -
>         offset = rtex->surface.level[level].offset;
>         if (rtex->surface.level[level].mode < RADEON_SURF_MODE_1D) {
>                 offset += rtex->surface.level[level].slice_size *
> @@ -1277,20 +1285,20 @@ void evergreen_init_color_surface(struct r600_context *rctx,
>         switch (rtex->surface.level[level].mode) {
>         case RADEON_SURF_MODE_LINEAR_ALIGNED:
>                 color_info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_ALIGNED);
> -               tile_type = 1;
> +               non_disp_tiling = 1;
>                 break;
>         case RADEON_SURF_MODE_1D:
>                 color_info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_1D_TILED_THIN1);
> -               tile_type = rtex->tile_type;
> +               non_disp_tiling = rtex->non_disp_tiling;
>                 break;
>         case RADEON_SURF_MODE_2D:
>                 color_info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_2D_TILED_THIN1);
> -               tile_type = rtex->tile_type;
> +               non_disp_tiling = rtex->non_disp_tiling;
>                 break;
>         case RADEON_SURF_MODE_LINEAR:
>         default:
>                 color_info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_GENERAL);
> -               tile_type = 1;
> +               non_disp_tiling = 1;
>                 break;
>         }
>         tile_split = rtex->surface.tile_split;
> @@ -1307,7 +1315,7 @@ void evergreen_init_color_surface(struct r600_context *rctx,
>         /* 128 bit formats require tile type = 1 */
>         if (rscreen->chip_class == CAYMAN) {
>                 if (util_format_get_blocksize(surf->base.format) >= 16)
> -                       tile_type = 1;
> +                       non_disp_tiling = 1;
>         }
>         nbanks = eg_num_banks(rscreen->tiling_info.num_banks);
>         desc = util_format_description(surf->base.format);
> @@ -1322,7 +1330,7 @@ void evergreen_init_color_surface(struct r600_context *rctx,
>                         S_028C74_BANK_WIDTH(bankw) |
>                         S_028C74_BANK_HEIGHT(bankh) |
>                         S_028C74_MACRO_TILE_ASPECT(macro_aspect) |
> -                       S_028C74_NON_DISP_TILING_ORDER(tile_type) |
> +                       S_028C74_NON_DISP_TILING_ORDER(non_disp_tiling) |
>                         S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
>
>         if (rctx->chip_class == CAYMAN && rtex->resource.b.b.nr_samples > 1) {
> @@ -1495,15 +1503,15 @@ static void evergreen_init_depth_surface(struct r600_context *rctx,
>         surf->db_depth_slice = S_02805C_SLICE_TILE_MAX(slice);
>
>         if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
> -               uint64_t stencil_offset = rtex->surface.stencil_offset;
> +               uint64_t stencil_offset;
>                 unsigned stile_split = rtex->surface.stencil_tile_split;
>
>                 stile_split = eg_tile_split(stile_split);
> +
> +               stencil_offset = rtex->surface.stencil_level[level].offset;
>                 stencil_offset += r600_resource_va(screen, surf->base.texture);
> -               stencil_offset += rtex->surface.level[level].offset / 4;
> -               stencil_offset >>= 8;
>
> -               surf->db_stencil_base = stencil_offset;
> +               surf->db_stencil_base = stencil_offset >> 8;
>                 surf->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8) |
>                                         S_028044_TILE_SPLIT(stile_split);
>         } else {
> @@ -2047,6 +2055,10 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
>                                      S_028000_STENCIL_COPY_ENABLE(a->copy_stencil) |
>                                      S_028000_COPY_CENTROID(1) |
>                                      S_028000_COPY_SAMPLE(a->copy_sample);
> +       } else if (a->flush_depthstencil_in_place) {
> +               db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(1) |
> +                                    S_028000_STENCIL_COMPRESS_DISABLE(1);
> +               db_render_override |= S_02800C_DISABLE_PIXEL_RATE_TILES(1);
>         }
>
>         r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
> diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
> index d10ec7f..164ec01 100644
> --- a/src/gallium/drivers/r600/evergreend.h
> +++ b/src/gallium/drivers/r600/evergreend.h
> @@ -1108,9 +1108,13 @@
>  #define   C_030018_INTERLACED                          0xFFFFFFBF
>  #define   S_030018_TILE_SPLIT(x)                       (((x) & 0x7) << 29)
>  #define R_03001C_SQ_TEX_RESOURCE_WORD7_0             0x03001C
> +#define   S_03001C_DATA_FORMAT(x)                      (((x) & 0x3F) << 0)
> +#define   G_03001C_DATA_FORMAT(x)                      (((x) >> 0) & 0x3F)
> +#define   C_03001C_DATA_FORMAT                         0xFFFFFFC0
>  #define   S_03001C_MACRO_TILE_ASPECT(x)                (((x) & 0x3) << 6)
>  #define   S_03001C_BANK_WIDTH(x)                       (((x) & 0x3) << 8)
>  #define   S_03001C_BANK_HEIGHT(x)                      (((x) & 0x3) << 10)
> +#define   S_03001C_DEPTH_SAMPLE_ORDER(x)               (((x) & 0x1) << 15)
>  #define   S_03001C_NUM_BANKS(x)                        (((x) & 0x3) << 16)
>  #define   S_03001C_TYPE(x)                             (((x) & 0x3) << 30)
>  #define   G_03001C_TYPE(x)                             (((x) >> 30) & 0x3)
> @@ -1119,9 +1123,6 @@
>  #define     V_03001C_SQ_TEX_VTX_INVALID_BUFFER         0x00000001
>  #define     V_03001C_SQ_TEX_VTX_VALID_TEXTURE          0x00000002
>  #define     V_03001C_SQ_TEX_VTX_VALID_BUFFER           0x00000003
> -#define   S_03001C_DATA_FORMAT(x)                      (((x) & 0x3F) << 0)
> -#define   G_03001C_DATA_FORMAT(x)                      (((x) >> 0) & 0x3F)
> -#define   C_03001C_DATA_FORMAT                         0xFFFFFFC0
>
>  #define R_030008_SQ_VTX_CONSTANT_WORD2_0             0x030008
>  #define   S_030008_BASE_ADDRESS_HI(x)                  (((x) & 0xFF) << 0)
> @@ -1643,6 +1644,9 @@
>  #define   S_02800C_IGNORE_SC_ZRANGE(x)                 (((x) & 0x1) << 17)
>  #define   G_02800C_IGNORE_SC_ZRANGE(x)                 (((x) >> 17) & 0x1)
>  #define   C_02800C_IGNORE_SC_ZRANGE                    0xFFFDFFFF
> +#define   S_02800C_DISABLE_PIXEL_RATE_TILES(x)         (((x) & 0x1) << 26)
> +#define   G_02800C_DISABLE_PIXEL_RATE_TILES(x)         (((x) >> 26) & 0x1)
> +#define   C_02800C_DISABLE_PIXEL_RATE_TILES            0xFFFDFFFF
>  #define R_028010_DB_RENDER_OVERRIDE2                 0x00028010
>  #define R_028014_DB_HTILE_DATA_BASE                  0x00028014
>  #define R_028028_DB_STENCIL_CLEAR                    0x00028028
> diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
> index 9bbbc45..5c704af 100644
> --- a/src/gallium/drivers/r600/r600_blit.c
> +++ b/src/gallium/drivers/r600/r600_blit.c
> @@ -161,7 +161,6 @@ void r600_blit_decompress_depth(struct pipe_context *ctx,
>         rctx->db_misc_state.copy_sample = first_sample;
>         r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
>
> -
>         for (level = first_level; level <= last_level; level++) {
>                 if (!staging && !(texture->dirty_level_mask & (1 << level)))
>                         continue;
> @@ -220,6 +219,58 @@ void r600_blit_decompress_depth(struct pipe_context *ctx,
>         r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
>  }
>
> +static void r600_blit_decompress_depth_in_place(struct r600_context *rctx,
> +                                                struct r600_texture *texture,
> +                                                unsigned first_level, unsigned last_level,
> +                                                unsigned first_layer, unsigned last_layer)
> +{
> +       struct pipe_surface *zsurf, surf_tmpl = {{0}};
> +       unsigned layer, max_layer, checked_last_layer, level;
> +
> +       /* Enable decompression in DB_RENDER_CONTROL */
> +       rctx->db_misc_state.flush_depthstencil_in_place = true;
> +       r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
> +
> +       surf_tmpl.format = texture->resource.b.b.format;
> +       surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL;
> +
> +       for (level = first_level; level <= last_level; level++) {
> +               if (!(texture->dirty_level_mask & (1 << level)))
> +                       continue;
> +
> +               surf_tmpl.u.tex.level = level;
> +
> +               /* The smaller the mipmap level, the less layers there are
> +                * as far as 3D textures are concerned. */
> +               max_layer = u_max_layer(&texture->resource.b.b, level);
> +               checked_last_layer = last_layer < max_layer ? last_layer : max_layer;
> +
> +               for (layer = first_layer; layer <= checked_last_layer; layer++) {
> +                       surf_tmpl.u.tex.first_layer = layer;
> +                       surf_tmpl.u.tex.last_layer = layer;
> +
> +                       zsurf = rctx->context.create_surface(&rctx->context, &texture->resource.b.b, &surf_tmpl);
> +
> +                       r600_blitter_begin(&rctx->context, R600_DECOMPRESS);
> +                       util_blitter_custom_depth_stencil(rctx->blitter, zsurf, NULL, ~0,
> +                                                         rctx->custom_dsa_flush, 1.0f);
> +                       r600_blitter_end(&rctx->context);
> +
> +                       pipe_surface_reference(&zsurf, NULL);
> +               }
> +
> +               /* The texture will always be dirty if some layers or samples aren't flushed.
> +                * I don't think this case occurs often though. */
> +               if (first_layer == 0 && last_layer == max_layer) {
> +                       texture->dirty_level_mask &= ~(1 << level);
> +               }
> +       }
> +
> +       /* Disable decompression in DB_RENDER_CONTROL */
> +       rctx->db_misc_state.flush_depthstencil_in_place = false;
> +       r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
> +}
> +
>  void r600_decompress_depth_textures(struct r600_context *rctx,
>                                struct r600_samplerview_state *textures)
>  {
> @@ -238,10 +289,17 @@ void r600_decompress_depth_textures(struct r600_context *rctx,
>                 tex = (struct r600_texture *)view->texture;
>                 assert(tex->is_depth && !tex->is_flushing_texture);
>
> -               r600_blit_decompress_depth(&rctx->context, tex, NULL,
> -                                          view->u.tex.first_level, view->u.tex.last_level,
> -                                          0, u_max_layer(&tex->resource.b.b, view->u.tex.first_level),
> -                                          0, u_max_sample(&tex->resource.b.b));
> +               if (rctx->chip_class >= EVERGREEN ||
> +                   r600_can_read_depth(tex)) {
> +                       r600_blit_decompress_depth_in_place(rctx, tex,
> +                                                  view->u.tex.first_level, view->u.tex.last_level,
> +                                                  0, u_max_layer(&tex->resource.b.b, view->u.tex.first_level));
> +               } else {
> +                       r600_blit_decompress_depth(&rctx->context, tex, NULL,
> +                                                  view->u.tex.first_level, view->u.tex.last_level,
> +                                                  0, u_max_layer(&tex->resource.b.b, view->u.tex.first_level),
> +                                                  0, u_max_sample(&tex->resource.b.b));
> +               }
>         }
>  }
>
> @@ -285,7 +343,7 @@ static void r600_blit_decompress_color(struct pipe_context *ctx,
>                         pipe_surface_reference(&cbsurf, NULL);
>                 }
>
> -               /* The texture will always be dirty if some layers or samples aren't flushed.
> +               /* The texture will always be dirty if some layers aren't flushed.
>                  * I don't think this case occurs often though. */
>                 if (first_layer == 0 && last_layer == max_layer) {
>                         rtex->dirty_level_mask &= ~(1 << level);
> @@ -337,13 +395,20 @@ static bool r600_decompress_subresource(struct pipe_context *ctx,
>         struct r600_texture *rtex = (struct r600_texture*)tex;
>
>         if (rtex->is_depth && !rtex->is_flushing_texture) {
> -               if (!r600_init_flushed_depth_texture(ctx, tex, NULL))
> -                       return false; /* error */
> +               if (rctx->chip_class >= EVERGREEN ||
> +                   r600_can_read_depth(rtex)) {
> +                       r600_blit_decompress_depth_in_place(rctx, rtex,
> +                                                  level, level,
> +                                                  first_layer, last_layer);
> +               } else {
> +                       if (!r600_init_flushed_depth_texture(ctx, tex, NULL))
> +                               return false; /* error */
>
> -               r600_blit_decompress_depth(ctx, rtex, NULL,
> -                                          level, level,
> -                                          first_layer, last_layer,
> -                                          0, u_max_sample(tex));
> +                       r600_blit_decompress_depth(ctx, rtex, NULL,
> +                                                  level, level,
> +                                                  first_layer, last_layer,
> +                                                  0, u_max_sample(tex));
> +               }
>         } else if (rctx->chip_class != CAYMAN && rtex->fmask_size && rtex->cmask_size) {
>                 r600_blit_decompress_color(ctx, rtex, level, level,
>                                            first_layer, last_layer);
> diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
> index 607116f..8a13e5c 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -69,6 +69,7 @@ struct r600_db_misc_state {
>         struct r600_atom atom;
>         bool occlusion_query_enabled;
>         bool flush_depthstencil_through_cb;
> +       bool flush_depthstencil_in_place;
>         bool copy_depth, copy_stencil;
>         unsigned copy_sample;
>         unsigned log_samples;
> diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
> index a5a5404..688ae53 100644
> --- a/src/gallium/drivers/r600/r600_resource.h
> +++ b/src/gallium/drivers/r600/r600_resource.h
> @@ -48,7 +48,7 @@ struct r600_texture {
>         unsigned                        array_mode[PIPE_MAX_TEXTURE_LEVELS];
>         unsigned                        pitch_override;
>         unsigned                        size;
> -       unsigned                        tile_type;
> +       bool                            non_disp_tiling;
>         bool                            is_depth;
>         bool                            is_rat;
>         unsigned                        dirty_level_mask; /* each bit says if that mipmap is compressed */
> @@ -114,6 +114,14 @@ struct r600_surface {
>         unsigned db_prefetch_limit;     /* R600 only */
>  };
>
> +/* Return if the depth format can be read without the DB->CB copy on r6xx-r7xx. */
> +static INLINE bool r600_can_read_depth(struct r600_texture *rtex)
> +{
> +       return rtex->resource.b.b.nr_samples <= 1 &&
> +              (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
> +               rtex->resource.b.b.format == PIPE_FORMAT_Z32_FLOAT);
> +}
> +
>  void r600_resource_destroy(struct pipe_screen *screen, struct pipe_resource *res);
>  void r600_init_screen_resource_functions(struct pipe_screen *screen);
>
> diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
> index 1d6171d..1bb0abb 100644
> --- a/src/gallium/drivers/r600/r600_state.c
> +++ b/src/gallium/drivers/r600/r600_state.c
> @@ -1017,7 +1017,7 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
>         struct r600_texture *tmp = (struct r600_texture*)texture;
>         unsigned format, endian;
>         uint32_t word4 = 0, yuv_format = 0, pitch = 0;
> -       unsigned char swizzle[4], array_mode = 0, tile_type = 0;
> +       unsigned char swizzle[4], array_mode = 0;
>         unsigned width, height, depth, offset_level, last_level;
>
>         if (view == NULL)
> @@ -1045,7 +1045,7 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
>                 return NULL;
>         }
>
> -       if (tmp->is_depth && !tmp->is_flushing_texture) {
> +       if (tmp->is_depth && !tmp->is_flushing_texture && !r600_can_read_depth(tmp)) {
>                 if (!r600_init_flushed_depth_texture(ctx, texture, NULL)) {
>                         FREE(view);
>                         return NULL;
> @@ -1061,7 +1061,6 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
>         height = height_first_level;
>         depth = tmp->surface.level[offset_level].npix_z;
>         pitch = tmp->surface.level[offset_level].nblk_x * util_format_get_blockwidth(state->format);
> -       tile_type = tmp->tile_type;
>
>         if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
>                 height = 1;
> @@ -1088,7 +1087,7 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
>         view->tex_resource = &tmp->resource;
>         view->tex_resource_words[0] = (S_038000_DIM(r600_tex_dim(texture->target, texture->nr_samples)) |
>                                        S_038000_TILE_MODE(array_mode) |
> -                                      S_038000_TILE_TYPE(tile_type) |
> +                                      S_038000_TILE_TYPE(tmp->non_disp_tiling) |
>                                        S_038000_PITCH((pitch / 8) - 1) |
>                                        S_038000_TEX_WIDTH(width - 1));
>         view->tex_resource_words[1] = (S_038004_TEX_HEIGHT(height - 1) |
> @@ -1214,7 +1213,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
>         int i;
>         bool blend_bypass = 0, blend_clamp = 1;
>
> -       if (rtex->is_depth && !rtex->is_flushing_texture) {
> +       if (rtex->is_depth && !rtex->is_flushing_texture && !r600_can_read_depth(rtex)) {
>                 r600_init_flushed_depth_texture(&rctx->context, surf->base.texture, NULL);
>                 rtex = rtex->flushed_depth_texture;
>                 assert(rtex);
> @@ -1858,6 +1857,10 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
>                                      S_028D0C_STENCIL_COPY_ENABLE(a->copy_stencil) |
>                                      S_028D0C_COPY_CENTROID(1) |
>                                      S_028D0C_COPY_SAMPLE(a->copy_sample);
> +       } else if (a->flush_depthstencil_in_place) {
> +               db_render_control |= S_028D0C_DEPTH_COMPRESS_DISABLE(1) |
> +                                    S_028D0C_STENCIL_COMPRESS_DISABLE(1);
> +               db_render_override |= S_028D10_NOOP_CULL_DISABLE(1);
>         }
>
>         r600_write_context_reg_seq(cs, R_028D0C_DB_RENDER_CONTROL, 2);
> diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
> index 4fb10ca..14aa02f 100644
> --- a/src/gallium/drivers/r600/r600_texture.c
> +++ b/src/gallium/drivers/r600/r600_texture.c
> @@ -151,7 +151,8 @@ static int r600_init_surface(struct r600_screen *rscreen,
>                 surface->flags |= RADEON_SURF_ZBUFFER;
>
>                 if (is_stencil) {
> -                       surface->flags |= RADEON_SURF_SBUFFER;
> +                       surface->flags |= RADEON_SURF_SBUFFER |
> +                                          RADEON_SURF_HAS_SBUFFER_MIPTREE;
>                 }
>         }
>         return 0;
> @@ -179,7 +180,8 @@ static int r600_setup_surface(struct pipe_screen *screen,
>                 rtex->surface.level[0].pitch_bytes = pitch_in_bytes_override;
>                 rtex->surface.level[0].slice_size = pitch_in_bytes_override * rtex->surface.level[0].nblk_y;
>                 if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
> -                       rtex->surface.stencil_offset = rtex->surface.level[0].slice_size;
> +                       rtex->surface.stencil_offset =
> +                       rtex->surface.stencil_level[0].offset = rtex->surface.level[0].slice_size;
>                 }
>         }
>         for (i = 0; i <= ptex->last_level; i++) {
> @@ -375,6 +377,8 @@ static void r600_texture_allocate_cmask(struct r600_screen *rscreen,
>  #endif
>  }
>
> +DEBUG_GET_ONCE_BOOL_OPTION(print_texdepth, "R600_PRINT_TEXDEPTH", FALSE);
> +
>  static struct r600_texture *
>  r600_texture_create_object(struct pipe_screen *screen,
>                            const struct pipe_resource *base,
> @@ -421,6 +425,9 @@ r600_texture_create_object(struct pipe_screen *screen,
>                 return NULL;
>         }
>
> +       /* Tiled depth textures utilize the non-displayable tile order. */
> +       rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D;
> +
>         /* Now create the backing buffer. */
>         if (!buf && alloc_bo) {
>                 unsigned base_align = rtex->surface.bo_alignment;
> @@ -442,6 +449,52 @@ r600_texture_create_object(struct pipe_screen *screen,
>                 memset(ptr + rtex->cmask_offset, 0xCC, rtex->cmask_size);
>                 rscreen->ws->buffer_unmap(resource->cs_buf);
>         }
> +
> +       if (debug_get_option_print_texdepth() && rtex->is_depth && rtex->non_disp_tiling) {
> +               printf("Texture: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
> +                      "blk_h=%u, blk_d=%u, array_size=%u, last_level=%u, "
> +                      "bpe=%u, nsamples=%u, flags=%u\n",
> +                      rtex->surface.npix_x, rtex->surface.npix_y,
> +                      rtex->surface.npix_z, rtex->surface.blk_w,
> +                      rtex->surface.blk_h, rtex->surface.blk_d,
> +                      rtex->surface.array_size, rtex->surface.last_level,
> +                      rtex->surface.bpe, rtex->surface.nsamples,
> +                      rtex->surface.flags);
> +               if (rtex->surface.flags & RADEON_SURF_ZBUFFER) {
> +                       for (int i = 0; i <= rtex->surface.last_level; i++) {
> +                               printf("  Z %i: offset=%llu, slice_size=%llu, npix_x=%u, "
> +                                      "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
> +                                      "nblk_z=%u, pitch_bytes=%u, mode=%u\n",
> +                                      i, rtex->surface.level[i].offset,
> +                                      rtex->surface.level[i].slice_size,
> +                                      rtex->surface.level[i].npix_x,
> +                                      rtex->surface.level[i].npix_y,
> +                                      rtex->surface.level[i].npix_z,
> +                                      rtex->surface.level[i].nblk_x,
> +                                      rtex->surface.level[i].nblk_y,
> +                                      rtex->surface.level[i].nblk_z,
> +                                      rtex->surface.level[i].pitch_bytes,
> +                                      rtex->surface.level[i].mode);
> +                       }
> +               }
> +               if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
> +                       for (int i = 0; i <= rtex->surface.last_level; i++) {
> +                               printf("  S %i: offset=%llu, slice_size=%llu, npix_x=%u, "
> +                                      "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
> +                                      "nblk_z=%u, pitch_bytes=%u, mode=%u\n",
> +                                      i, rtex->surface.stencil_level[i].offset,
> +                                      rtex->surface.stencil_level[i].slice_size,
> +                                      rtex->surface.stencil_level[i].npix_x,
> +                                      rtex->surface.stencil_level[i].npix_y,
> +                                      rtex->surface.stencil_level[i].npix_z,
> +                                      rtex->surface.stencil_level[i].nblk_x,
> +                                      rtex->surface.stencil_level[i].nblk_y,
> +                                      rtex->surface.stencil_level[i].nblk_z,
> +                                      rtex->surface.stencil_level[i].pitch_bytes,
> +                                      rtex->surface.stencil_level[i].mode);
> +                       }
> +               }
> +       }
>         return rtex;
>  }
>
> @@ -600,6 +653,7 @@ bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
>         }
>
>         (*flushed_depth_texture)->is_flushing_texture = TRUE;
> +       (*flushed_depth_texture)->non_disp_tiling = false;
>         return true;
>  }
>
> @@ -870,7 +924,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen,
>
>         /* Colorspace (return non-RGB formats directly). */
>         switch (desc->colorspace) {
> -               /* Depth stencil formats */
> +       /* Depth stencil formats */
>         case UTIL_FORMAT_COLORSPACE_ZS:
>                 switch (format) {
>                 case PIPE_FORMAT_Z16_UNORM:
> --
> 1.7.9.5
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list