[Mesa-dev] [PATCH 11/14] swr: rework resource layout and surface setup

Cherniak, Bruce bruce.cherniak at intel.com
Tue Nov 15 01:46:49 UTC 2016


This one is going to take a bit more testing and internal review before adopting.

> On Nov 12, 2016, at 5:00 PM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:
> 
> This is a bit of a mega-commit, but unfortunately there's no great way
> to break this up since a lot of different pieces have to match up. Here
> we do the following:
> - change surface layout to match swr's Load/StoreTile expectations
> - fix sampler settings to respect all sampler view parameters
> - fix stencil sampling to read from secondary resource
> - respect pipe surface format, level, and layer settings
> - fix resource map/unmap based on the new layout logic
> - fix resource map/unmap to copy proper parts of stencil values in and
>   out of the matching depth texture
> 
> These fix a massive quantity of piglits, including all the
> tex-miplevel-selection ones.
> 
> Note that the swr native miptree layout isn't extremely space-efficient,
> and we end up using it for all textures, not just the renderable ones. A
> back-of-the-envelope calculation suggests about 10%-25% increased memory
> usage for miptrees, depending on the number of LODs. Single-LOD textures
> should be unaffected.
> 
> There are a handful of regressions as a result of this change:
> - fbo-generatemipmap-formats on compressed textures with irregular
>   sizes fails. The 2+ levels appear as if their offsets were off by a
>   bit. No idea why, despite a lot of staring. I suspect the fact that
>   this test was passing before is pure coincidence as well.
> - Some textureGrad tests, these failures match llvmpipe. (There are
>   debug settings allowing improved gallivm sampling accurancy.)
> - Some layered clearing tests as swr doesn't currently support that. It
>   was getting lucky before because enough other things were broken.
> 
> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
> ---
> src/gallium/drivers/swr/swr_context.cpp | 103 ++++++++++++-----
> src/gallium/drivers/swr/swr_draw.cpp    |   4 +-
> src/gallium/drivers/swr/swr_resource.h  |   8 +-
> src/gallium/drivers/swr/swr_screen.cpp  | 188 +++++++++++++++++++++-----------
> src/gallium/drivers/swr/swr_shader.cpp  |  28 ++++-
> src/gallium/drivers/swr/swr_state.cpp   | 166 +++++++++++++++++-----------
> 6 files changed, 337 insertions(+), 160 deletions(-)
> 
> diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp
> index 6bc6de4..fc8e74a 100644
> --- a/src/gallium/drivers/swr/swr_context.cpp
> +++ b/src/gallium/drivers/swr/swr_context.cpp
> @@ -139,21 +139,35 @@ swr_transfer_map(struct pipe_context *pipe,
>    if (!pt)
>       return NULL;
>    pipe_resource_reference(&pt->resource, resource);
> +   pt->usage = (pipe_transfer_usage)usage;
>    pt->level = level;
>    pt->box = *box;
> -   pt->stride = spr->row_stride[level];
> -   pt->layer_stride = spr->img_stride[level];
> -
> -   /* if we're mapping the depth/stencil, copy in stencil */
> -   if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT
> -       && spr->has_stencil) {
> -      for (unsigned i = 0; i < spr->alignedWidth * spr->alignedHeight; i++) {
> -         spr->swr.pBaseAddress[4 * i + 3] = spr->secondary.pBaseAddress[i];
> -      }
> -   } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
> -              && spr->has_stencil) {
> -      for (unsigned i = 0; i < spr->alignedWidth * spr->alignedHeight; i++) {
> -         spr->swr.pBaseAddress[8 * i + 4] = spr->secondary.pBaseAddress[i];
> +   pt->stride = spr->swr.pitch;
> +   pt->layer_stride = spr->swr.qpitch * spr->swr.pitch;
> +
> +   /* if we're mapping the depth/stencil, copy in stencil for the section
> +    * being read in
> +    */
> +   if (usage & PIPE_TRANSFER_READ && spr->has_depth && spr->has_stencil) {
> +      size_t zbase, sbase;
> +      for (int z = box->z; z < box->z + box->depth; z++) {
> +         zbase = (z * spr->swr.qpitch + box->y) * spr->swr.pitch +
> +            spr->mip_offsets[level];
> +         sbase = (z * spr->secondary.qpitch + box->y) * spr->secondary.pitch +
> +            spr->secondary_mip_offsets[level];
> +         for (int y = box->y; y < box->y + box->height; y++) {
> +            if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
> +               for (int x = box->x; x < box->x + box->width; x++)
> +                  spr->swr.pBaseAddress[zbase + 4 * x + 3] =
> +                     spr->secondary.pBaseAddress[sbase + x];
> +            } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
> +               for (int x = box->x; x < box->x + box->width; x++)
> +                  spr->swr.pBaseAddress[zbase + 8 * x + 4] =
> +                     spr->secondary.pBaseAddress[sbase + x];
> +            }
> +            zbase += spr->swr.pitch;
> +            sbase += spr->secondary.pitch;
> +         }
>       }
>    }
> 
> @@ -167,23 +181,60 @@ swr_transfer_map(struct pipe_context *pipe,
> }
> 
> static void
> -swr_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer)
> +swr_transfer_flush_region(struct pipe_context *pipe,
> +                          struct pipe_transfer *transfer,
> +                          const struct pipe_box *flush_box)
> {
>    assert(transfer->resource);
> +   assert(transfer->usage & PIPE_TRANSFER_WRITE);
> 
> -   struct swr_resource *res = swr_resource(transfer->resource);
> -   /* if we're mapping the depth/stencil, copy out stencil */
> -   if (res->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT
> -       && res->has_stencil) {
> -      for (unsigned i = 0; i < res->alignedWidth * res->alignedHeight; i++) {
> -         res->secondary.pBaseAddress[i] = res->swr.pBaseAddress[4 * i + 3];
> -      }
> -   } else if (res->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
> -              && res->has_stencil) {
> -      for (unsigned i = 0; i < res->alignedWidth * res->alignedHeight; i++) {
> -         res->secondary.pBaseAddress[i] = res->swr.pBaseAddress[8 * i + 4];
> +   struct swr_resource *spr = swr_resource(transfer->resource);
> +   if (!spr->has_depth || !spr->has_stencil)
> +      return;
> +
> +   size_t zbase, sbase;
> +   struct pipe_box box = *flush_box;
> +   box.x += transfer->box.x;
> +   box.y += transfer->box.y;
> +   box.z += transfer->box.z;
> +   for (int z = box.z; z < box.z + box.depth; z++) {
> +      zbase = (z * spr->swr.qpitch + box.y) * spr->swr.pitch +
> +         spr->mip_offsets[transfer->level];
> +      sbase = (z * spr->secondary.qpitch + box.y) * spr->secondary.pitch +
> +         spr->secondary_mip_offsets[transfer->level];
> +      for (int y = box.y; y < box.y + box.height; y++) {
> +         if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
> +            for (int x = box.x; x < box.x + box.width; x++)
> +               spr->secondary.pBaseAddress[sbase + x] =
> +                  spr->swr.pBaseAddress[zbase + 4 * x + 3];
> +         } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
> +            for (int x = box.x; x < box.x + box.width; x++)
> +               spr->secondary.pBaseAddress[sbase + x] =
> +                  spr->swr.pBaseAddress[zbase + 8 * x + 4];
> +         }
> +         zbase += spr->swr.pitch;
> +         sbase += spr->secondary.pitch;
>       }
>    }
> +}
> +
> +static void
> +swr_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer)
> +{
> +   assert(transfer->resource);
> +
> +   struct swr_resource *spr = swr_resource(transfer->resource);
> +   /* if we're mapping the depth/stencil, copy in stencil for the section
> +    * being written out
> +    */
> +   if (transfer->usage & PIPE_TRANSFER_WRITE &&
> +       !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT) &&
> +       spr->has_depth && spr->has_stencil) {
> +      struct pipe_box box;
> +      u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height,
> +               transfer->box.depth, &box);
> +      swr_transfer_flush_region(pipe, transfer, &box);
> +   }
> 
>    pipe_resource_reference(&transfer->resource, NULL);
>    FREE(transfer);
> @@ -425,8 +476,8 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
>    ctx->pipe.surface_destroy = swr_surface_destroy;
>    ctx->pipe.transfer_map = swr_transfer_map;
>    ctx->pipe.transfer_unmap = swr_transfer_unmap;
> +   ctx->pipe.transfer_flush_region = swr_transfer_flush_region;
> 
> -   ctx->pipe.transfer_flush_region = u_default_transfer_flush_region;
>    ctx->pipe.buffer_subdata = u_default_buffer_subdata;
>    ctx->pipe.texture_subdata = u_default_texture_subdata;
> 
> diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp
> index 39378e6..ba10bd5 100644
> --- a/src/gallium/drivers/swr/swr_draw.cpp
> +++ b/src/gallium/drivers/swr/swr_draw.cpp
> @@ -282,7 +282,9 @@ swr_store_dirty_resource(struct pipe_context *pipe,
>       swr_draw_context *pDC = &ctx->swrDC;
>       SWR_SURFACE_STATE *renderTargets = pDC->renderTargets;
>       for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++)
> -         if (renderTargets[i].pBaseAddress == spr->swr.pBaseAddress) {
> +         if (renderTargets[i].pBaseAddress == spr->swr.pBaseAddress ||
> +             (spr->secondary.pBaseAddress &&
> +              renderTargets[i].pBaseAddress == spr->secondary.pBaseAddress)) {
>             swr_store_render_target(pipe, i, post_tile_state);
> 
>             /* Mesa thinks depth/stencil are fused, so we'll never get an
> diff --git a/src/gallium/drivers/swr/swr_resource.h b/src/gallium/drivers/swr/swr_resource.h
> index 00001e9..41abd77 100644
> --- a/src/gallium/drivers/swr/swr_resource.h
> +++ b/src/gallium/drivers/swr/swr_resource.h
> @@ -41,17 +41,13 @@ struct swr_resource {
>    bool has_depth;
>    bool has_stencil;
> 
> -   UINT alignedWidth;
> -   UINT alignedHeight;
> -
>    SWR_SURFACE_STATE swr;
>    SWR_SURFACE_STATE secondary; /* for faking depth/stencil merged formats */
> 
>    struct sw_displaytarget *display_target;
> 
> -   unsigned row_stride[PIPE_MAX_TEXTURE_LEVELS];
> -   unsigned img_stride[PIPE_MAX_TEXTURE_LEVELS];
> -   unsigned mip_offsets[PIPE_MAX_TEXTURE_LEVELS];
> +   size_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS];
> +   size_t secondary_mip_offsets[PIPE_MAX_TEXTURE_LEVELS];
> 
>    enum swr_resource_status status;
> };
> diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp
> index accd6a2..73deb03 100644
> --- a/src/gallium/drivers/swr/swr_screen.cpp
> +++ b/src/gallium/drivers/swr/swr_screen.cpp
> @@ -44,6 +44,8 @@ extern "C" {
> 
> #include "jit_api.h"
> 
> +#include "memory/TilingFunctions.h"
> +
> #include <stdio.h>
> #include <map>
> 
> @@ -721,12 +723,14 @@ swr_displaytarget_layout(struct swr_screen *screen, struct swr_resource *res)
>    struct sw_winsys *winsys = screen->winsys;
>    struct sw_displaytarget *dt;
> 
> +   const unsigned width = align(res->swr.width, res->swr.halign);
> +   const unsigned height = align(res->swr.height, res->swr.valign);
> +
>    UINT stride;
>    dt = winsys->displaytarget_create(winsys,
>                                      res->base.bind,
>                                      res->base.format,
> -                                     res->alignedWidth,
> -                                     res->alignedHeight,
> +                                     width, height,
>                                      64, NULL,
>                                      &stride);
> 
> @@ -740,14 +744,14 @@ swr_displaytarget_layout(struct swr_screen *screen, struct swr_resource *res)
> 
>    /* Clear the display target surface */
>    if (map)
> -      memset(map, 0, res->alignedHeight * stride);
> +      memset(map, 0, height * stride);
> 
>    winsys->displaytarget_unmap(winsys, dt);
> 
>    return TRUE;
> }
> 
> -static boolean
> +static bool
> swr_texture_layout(struct swr_screen *screen,
>                    struct swr_resource *res,
>                    boolean allocate)
> @@ -763,87 +767,149 @@ swr_texture_layout(struct swr_screen *screen,
>    if (res->has_stencil && !res->has_depth)
>       fmt = PIPE_FORMAT_R8_UINT;
> 
> +   /* We always use the SWR layout. For 2D and 3D textures this looks like:
> +    *
> +    * |<------- pitch ------->|
> +    * +=======================+-------
> +    * |Array 0                |   ^
> +    * |                       |   |
> +    * |        Level 0        |   |
> +    * |                       |   |
> +    * |                       | qpitch
> +    * +-----------+-----------+   |
> +    * |           | L2L2L2L2  |   |
> +    * |  Level 1  | L3L3      |   |
> +    * |           | L4        |   v
> +    * +===========+===========+-------
> +    * |Array 1                |
> +    * |                       |
> +    * |        Level 0        |
> +    * |                       |
> +    * |                       |
> +    * +-----------+-----------+
> +    * |           | L2L2L2L2  |
> +    * |  Level 1  | L3L3      |
> +    * |           | L4        |
> +    * +===========+===========+
> +    *
> +    * The overall width in bytes is known as the pitch, while the overall
> +    * height in rows is the qpitch. Array slices are laid out logically below
> +    * one another, qpitch rows apart. For 3D surfaces, the "level" values are
> +    * just invalid for the higher array numbers (since depth is also
> +    * minified). 1D and 1D array surfaces are stored effectively the same way,
> +    * except that pitch never plays into it. All the levels are logically
> +    * adjacent to each other on the X axis.
> +    *
> +    * Each level's sizes are subject to the valign and halign settings of the
> +    * surface. For compressed formats that swr is unaware of, we will use an
> +    * appropriately-sized uncompressed format, and scale the widths/heights.
> +    *
> +    * This surface is stored inside res->swr. For depth/stencil textures,
> +    * res->secondary will have an identically-laid-out but R8_UINT-formatted
> +    * stencil tree. In the Z32F_S8 case, the primary surface still has 64-bpp
> +    * texels, to simplify map/unmap logic which copies the stencil values
> +    * in/out.
> +    */
> +
>    res->swr.width = pt->width0;
>    res->swr.height = pt->height0;
> -   res->swr.depth = pt->depth0;
>    res->swr.type = swr_convert_target_type(pt->target);
>    res->swr.tileMode = SWR_TILE_NONE;
>    res->swr.format = mesa_to_swr_format(fmt);
> -   res->swr.numSamples = (1 << pt->nr_samples);
> +   res->swr.numSamples = std::max(1u, pt->nr_samples);
> 
> -   SWR_FORMAT_INFO finfo = GetFormatInfo(res->swr.format);
> -
> -   size_t total_size = 0;
> -   unsigned width = pt->width0;
> -   unsigned height = pt->height0;
> -   unsigned depth = pt->depth0;
> -   unsigned layers = pt->array_size;
> -
> -   for (int level = 0; level <= pt->last_level; level++) {
> -      unsigned alignedWidth, alignedHeight;
> -      unsigned num_slices;
> +   if (pt->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) {
> +      res->swr.halign = KNOB_MACROTILE_X_DIM;
> +      res->swr.valign = KNOB_MACROTILE_Y_DIM;
> +   } else {
> +      res->swr.halign = 1;
> +      res->swr.valign = 1;
> +   }
> 
> -      if (pt->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) {
> -         alignedWidth = align(width, KNOB_MACROTILE_X_DIM);
> -         alignedHeight = align(height, KNOB_MACROTILE_Y_DIM);
> -      } else {
> -         alignedWidth = width;
> -         alignedHeight = height;
> +   // The pitch is the overall width of the texture in bytes. Most of the time
> +   // this is the pitch of level 0 since all the other levels fit underneath
> +   // it. However in some degenerate situations, the width of level1 + level2
> +   // may be larger. In that case, we use those widths. This can happen if,
> +   // e.g. halign is 32, and the width of level 0 is 32 or less. In that case,
> +   // the aligned levels 1 and 2 will also be 32 each, adding up to 64.
> +   unsigned width_blocks = util_format_get_nblocksx(fmt, pt->width0);
> +   unsigned width = align(width_blocks, res->swr.halign);
> +   if (pt->last_level > 1) {
> +      width = std::max<uint32_t>(
> +            width,
> +            align(u_minify(width_blocks, 1), res->swr.halign) +
> +            align(u_minify(width_blocks, 2), res->swr.halign));
> +   }
> +   res->swr.pitch = width * util_format_get_blocksize(fmt);
> +
> +
> +   // The qpitch is controlled by either the height of the second LOD, or the
> +   // combination of all the later LODs.
> +   unsigned height_blocks = util_format_get_nblocksy(fmt, pt->height0);
> +   res->swr.qpitch = align(height_blocks, res->swr.valign);
> +   if (pt->last_level == 1) {
> +      res->swr.qpitch += align(u_minify(height_blocks, 1), res->swr.valign);
> +   } else if (pt->last_level > 1) {
> +      unsigned height = u_minify(height_blocks, 1);
> +      unsigned level1 = align(height, res->swr.valign);
> +      unsigned level2 = 0;
> +      for (int level = 2; level <= pt->last_level; level++) {
> +         height = u_minify(height, 1);
> +         level2 += align(height, res->swr.valign);
>       }
> +      res->swr.qpitch += std::max(level1, level2);
> +   }
> 
> -      if (level == 0) {
> -         res->alignedWidth = alignedWidth;
> -         res->alignedHeight = alignedHeight;
> +   if (pt->target == PIPE_TEXTURE_3D)
> +      res->swr.depth = pt->depth0;
> +   else
> +      res->swr.depth = pt->array_size;
> +
> +   // Fix up swr format if necessary so that LOD offset computation works
> +   if (res->swr.format == (SWR_FORMAT)-1) {
> +      res->swr.width = util_format_get_nblocksx(fmt, res->swr.width);
> +      res->swr.height = util_format_get_nblocksy(fmt, res->swr.height);
> +      switch (util_format_get_blocksize(fmt)) {
> +      default:
> +         unreachable("Unexpected format block size");
> +      case 1: res->swr.format = R8_UINT; break;
> +      case 2: res->swr.format = R16_UINT; break;
> +      case 4: res->swr.format = R32_UINT; break;
> +      case 8: res->swr.format = R32G32_UINT; break;
> +      case 16: res->swr.format = R32G32B32A32_UINT; break;
>       }
> +   }
> 
> -      res->row_stride[level] = util_format_get_stride(fmt, alignedWidth);
> -      res->img_stride[level] =
> -         res->row_stride[level] * util_format_get_nblocksy(fmt, alignedHeight);
> -      res->mip_offsets[level] = total_size;
> -
> -      if (pt->target == PIPE_TEXTURE_3D)
> -         num_slices = depth;
> -      else if (pt->target == PIPE_TEXTURE_1D_ARRAY
> -               || pt->target == PIPE_TEXTURE_2D_ARRAY
> -               || pt->target == PIPE_TEXTURE_CUBE
> -               || pt->target == PIPE_TEXTURE_CUBE_ARRAY)
> -         num_slices = layers;
> -      else
> -         num_slices = 1;
> -
> -      total_size += res->img_stride[level] * num_slices;
> -      if (total_size > SWR_MAX_TEXTURE_SIZE)
> -         return FALSE;
> -
> -      width = u_minify(width, 1);
> -      height = u_minify(height, 1);
> -      depth = u_minify(depth, 1);
> +   for (int level = 0; level <= pt->last_level; level++) {
> +      res->mip_offsets[level] =
> +         ComputeSurfaceOffset<false>(0, 0, 0, 0, 0, level, &res->swr);
>    }
> 
> -   res->swr.halign = res->alignedWidth;
> -   res->swr.valign = res->alignedHeight;
> -   res->swr.pitch = res->row_stride[0];
> +   size_t total_size =
> +      (size_t)res->swr.depth * res->swr.qpitch * res->swr.pitch;
> +   if (total_size > SWR_MAX_TEXTURE_SIZE)
> +      return false;
> 
>    if (allocate) {
>       res->swr.pBaseAddress = (uint8_t *)AlignedMalloc(total_size, 64);
> 
>       if (res->has_depth && res->has_stencil) {
> -         SWR_FORMAT_INFO finfo = GetFormatInfo(res->secondary.format);
> -         res->secondary.width = pt->width0;
> -         res->secondary.height = pt->height0;
> -         res->secondary.depth = pt->depth0;
> -         res->secondary.type = SURFACE_2D;
> -         res->secondary.tileMode = SWR_TILE_NONE;
> +         res->secondary = res->swr;
>          res->secondary.format = R8_UINT;
> -         res->secondary.numSamples = (1 << pt->nr_samples);
> -         res->secondary.pitch = res->alignedWidth * finfo.Bpp;
> +         res->secondary.pitch = res->swr.pitch / util_format_get_blocksize(fmt);
> +
> +         for (int level = 0; level <= pt->last_level; level++) {
> +            res->secondary_mip_offsets[level] =
> +               ComputeSurfaceOffset<false>(0, 0, 0, 0, 0, level, &res->secondary);
> +         }
> 
>          res->secondary.pBaseAddress = (uint8_t *)AlignedMalloc(
> -            res->alignedHeight * res->secondary.pitch, 64);
> +            res->secondary.depth * res->secondary.qpitch *
> +            res->secondary.pitch, 64);
>       }
>    }
> 
> -   return TRUE;
> +   return true;
> }
> 
> static boolean
> diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
> index 38a916e..f639df3 100644
> --- a/src/gallium/drivers/swr/swr_shader.cpp
> +++ b/src/gallium/drivers/swr/swr_shader.cpp
> @@ -34,6 +34,7 @@
> #include "builder.h"
> 
> #include "tgsi/tgsi_strings.h"
> +#include "util/u_format.h"
> #include "gallivm/lp_bld_init.h"
> #include "gallivm/lp_bld_flow.h"
> #include "gallivm/lp_bld_struct.h"
> @@ -41,6 +42,7 @@
> 
> #include "swr_context.h"
> #include "swr_context_llvm.h"
> +#include "swr_resource.h"
> #include "swr_state.h"
> #include "swr_screen.h"
> 
> @@ -85,18 +87,36 @@ swr_generate_sampler_key(const struct lp_tgsi_info &info,
>          info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
>       for (unsigned i = 0; i < key.nr_sampler_views; i++) {
>          if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) {
> +            const struct pipe_sampler_view *view =
> +               ctx->sampler_views[shader_type][i];
>             lp_sampler_static_texture_state(
> -               &key.sampler[i].texture_state,
> -               ctx->sampler_views[shader_type][i]);
> +               &key.sampler[i].texture_state, view);
> +            if (view) {
> +               struct swr_resource *swr_res = swr_resource(view->texture);
> +               const struct util_format_description *desc =
> +                  util_format_description(view->format);
> +               if (swr_res->has_depth && swr_res->has_stencil &&
> +                   !util_format_has_depth(desc))
> +                  key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
> +            }
>          }
>       }
>    } else {
>       key.nr_sampler_views = key.nr_samplers;
>       for (unsigned i = 0; i < key.nr_sampler_views; i++) {
>          if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
> +            const struct pipe_sampler_view *view =
> +               ctx->sampler_views[shader_type][i];
>             lp_sampler_static_texture_state(
> -               &key.sampler[i].texture_state,
> -               ctx->sampler_views[shader_type][i]);
> +               &key.sampler[i].texture_state, view);
> +            if (view) {
> +               struct swr_resource *swr_res = swr_resource(view->texture);
> +               const struct util_format_description *desc =
> +                  util_format_description(view->format);
> +               if (swr_res->has_depth && swr_res->has_stencil &&
> +                   !util_format_has_depth(desc))
> +                  key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
> +            }
>          }
>       }
>    }
> diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
> index 783afba..2c7f3be 100644
> --- a/src/gallium/drivers/swr/swr_state.cpp
> +++ b/src/gallium/drivers/swr/swr_state.cpp
> @@ -701,25 +701,46 @@ swr_update_texture_state(struct swr_context *ctx,
>    for (unsigned i = 0; i < num_sampler_views; i++) {
>       struct pipe_sampler_view *view =
>          ctx->sampler_views[shader_type][i];
> +      struct swr_jit_texture *jit_tex = &textures[i];
> 
> +      memset(jit_tex, 0, sizeof(*jit_tex));
>       if (view) {
>          struct pipe_resource *res = view->texture;
>          struct swr_resource *swr_res = swr_resource(res);
> -         struct swr_jit_texture *jit_tex = &textures[i];
> -         memset(jit_tex, 0, sizeof(*jit_tex));
> +         SWR_SURFACE_STATE *swr = &swr_res->swr;
> +         size_t *mip_offsets = swr_res->mip_offsets;
> +         if (swr_res->has_depth && swr_res->has_stencil &&
> +            !util_format_has_depth(util_format_description(view->format))) {
> +            swr = &swr_res->secondary;
> +            mip_offsets = swr_res->secondary_mip_offsets;
> +         }
> +
>          jit_tex->width = res->width0;
>          jit_tex->height = res->height0;
> -         jit_tex->depth = res->depth0;
> -         jit_tex->first_level = view->u.tex.first_level;
> -         jit_tex->last_level = view->u.tex.last_level;
> -         jit_tex->base_ptr = swr_res->swr.pBaseAddress;
> +         jit_tex->base_ptr = swr->pBaseAddress;
> +         if (view->target != PIPE_BUFFER) {
> +            jit_tex->first_level = view->u.tex.first_level;
> +            jit_tex->last_level = view->u.tex.last_level;
> +            if (view->target == PIPE_TEXTURE_3D)
> +               jit_tex->depth = res->depth0;
> +            else
> +               jit_tex->depth =
> +                  view->u.tex.last_layer - view->u.tex.first_layer + 1;
> +            jit_tex->base_ptr += view->u.tex.first_layer *
> +               swr->qpitch * swr->pitch;
> +         } else {
> +            unsigned view_blocksize = util_format_get_blocksize(view->format);
> +            jit_tex->base_ptr += view->u.buf.offset;
> +            jit_tex->width = view->u.buf.size / view_blocksize;
> +            jit_tex->depth = 1;
> +         }
> 
>          for (unsigned level = jit_tex->first_level;
>               level <= jit_tex->last_level;
>               level++) {
> -            jit_tex->row_stride[level] = swr_res->row_stride[level];
> -            jit_tex->img_stride[level] = swr_res->img_stride[level];
> -            jit_tex->mip_offsets[level] = swr_res->mip_offsets[level];
> +            jit_tex->row_stride[level] = swr->pitch;
> +            jit_tex->img_stride[level] = swr->qpitch * swr->pitch;
> +            jit_tex->mip_offsets[level] = mip_offsets[level];
>          }
>       }
>    }
> @@ -789,6 +810,61 @@ swr_update_constants(struct swr_context *ctx, enum pipe_shader_type shaderType)
>    }
> }
> 
> +static bool
> +swr_change_rt(struct swr_context *ctx,
> +              unsigned attachment,
> +              const struct pipe_surface *sf)
> +{
> +   swr_draw_context *pDC = &ctx->swrDC;
> +   struct SWR_SURFACE_STATE *rt = &pDC->renderTargets[attachment];
> +
> +   /* Do nothing if the render target hasn't changed */
> +   if ((!sf || !sf->texture) && rt->pBaseAddress == nullptr)
> +      return false;
> +
> +   /* Deal with disabling RT up front */
> +   if (!sf || !sf->texture) {
> +      /* If detaching attachment, mark tiles as RESOLVED so core
> +       * won't try to load from non-existent target. */
> +      swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_RESOLVED);
> +      *rt = {0};
> +      return true;
> +   }
> +
> +   const struct swr_resource *swr = swr_resource(sf->texture);
> +   const SWR_SURFACE_STATE *swr_surface = &swr->swr;
> +   SWR_FORMAT fmt = mesa_to_swr_format(sf->format);
> +
> +   if (attachment == SWR_ATTACHMENT_STENCIL && swr->secondary.pBaseAddress) {
> +      swr_surface = &swr->secondary;
> +      fmt = swr_surface->format;
> +   }
> +
> +   if (rt->pBaseAddress == swr_surface->pBaseAddress &&
> +       rt->format == fmt &&
> +       rt->lod == sf->u.tex.level &&
> +       rt->arrayIndex == sf->u.tex.first_layer)
> +      return false;
> +
> +   bool need_fence = false;
> +
> +   /* StoreTile for changed target */
> +   if (rt->pBaseAddress) {
> +      /* If changing attachment to a new target, mark tiles as
> +       * INVALID so they are reloaded from surface. */
> +      swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_INVALID);
> +      need_fence = true;
> +   }
> +
> +   /* Make new attachment */
> +   *rt = *swr_surface;
> +   rt->format = fmt;
> +   rt->lod = sf->u.tex.level;
> +   rt->arrayIndex = sf->u.tex.first_layer;
> +
> +   return need_fence;
> +}
> +
> void
> swr_update_derived(struct pipe_context *pipe,
>                    const struct pipe_draw_info *p_draw_info)
> @@ -807,64 +883,30 @@ swr_update_derived(struct pipe_context *pipe,
>    /* Render Targets */
>    if (ctx->dirty & SWR_NEW_FRAMEBUFFER) {
>       struct pipe_framebuffer_state *fb = &ctx->framebuffer;
> -      SWR_SURFACE_STATE *new_attachment[SWR_NUM_ATTACHMENTS] = {0};
> -      UINT i;
> +      const struct util_format_description *desc = NULL;
> +      bool need_fence = false;
> 
>       /* colorbuffer targets */
> -      if (fb->nr_cbufs)
> -         for (i = 0; i < fb->nr_cbufs; ++i)
> -            if (fb->cbufs[i]) {
> -               struct swr_resource *colorBuffer =
> -                  swr_resource(fb->cbufs[i]->texture);
> -               new_attachment[SWR_ATTACHMENT_COLOR0 + i] = &colorBuffer->swr;
> -            }
> -
> -      /* depth/stencil target */
> -      if (fb->zsbuf) {
> -         struct swr_resource *depthStencilBuffer =
> -            swr_resource(fb->zsbuf->texture);
> -         if (depthStencilBuffer->has_depth) {
> -            new_attachment[SWR_ATTACHMENT_DEPTH] = &depthStencilBuffer->swr;
> -
> -            if (depthStencilBuffer->has_stencil)
> -               new_attachment[SWR_ATTACHMENT_STENCIL] =
> -                  &depthStencilBuffer->secondary;
> -
> -         } else if (depthStencilBuffer->has_stencil)
> -            new_attachment[SWR_ATTACHMENT_STENCIL] = &depthStencilBuffer->swr;
> +      if (fb->nr_cbufs) {
> +         for (unsigned i = 0; i < fb->nr_cbufs; ++i)
> +            need_fence |= swr_change_rt(
> +                  ctx, SWR_ATTACHMENT_COLOR0 + i, fb->cbufs[i]);
>       }
> +      for (unsigned i = fb->nr_cbufs; i < SWR_NUM_RENDERTARGETS; ++i)
> +         need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_COLOR0 + i, NULL);
> 
> -      /* Make the attachment updates */
> -      swr_draw_context *pDC = &ctx->swrDC;
> -      SWR_SURFACE_STATE *renderTargets = pDC->renderTargets;
> -      unsigned need_fence = FALSE;
> -      for (i = 0; i < SWR_NUM_ATTACHMENTS; i++) {
> -         void *new_base = nullptr;
> -         if (new_attachment[i])
> -            new_base = new_attachment[i]->pBaseAddress;
> -
> -         /* StoreTile for changed target */
> -         if (renderTargets[i].pBaseAddress != new_base) {
> -            if (renderTargets[i].pBaseAddress) {
> -               /* If changing attachment to a new target, mark tiles as
> -                * INVALID so they are reloaded from surface.
> -                * If detaching attachment, mark tiles as RESOLVED so core
> -                * won't try to load from non-existent target. */
> -               enum SWR_TILE_STATE post_state = (new_attachment[i]
> -                  ? SWR_TILE_INVALID : SWR_TILE_RESOLVED);
> -               swr_store_render_target(pipe, i, post_state);
> -
> -               need_fence |= TRUE;
> -            }
> +      /* depth/stencil target */
> +      if (fb->zsbuf)
> +         desc = util_format_description(fb->zsbuf->format);
> +      if (fb->zsbuf && util_format_has_depth(desc))
> +         need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, fb->zsbuf);
> +      else
> +         need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, NULL);
> 
> -            /* Make new attachment */
> -            if (new_attachment[i])
> -               renderTargets[i] = *new_attachment[i];
> -            else
> -               if (renderTargets[i].pBaseAddress)
> -                  renderTargets[i] = {0};
> -         }
> -      }
> +      if (fb->zsbuf && util_format_has_stencil(desc))
> +         need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, fb->zsbuf);
> +      else
> +         need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, NULL);
> 
>       /* This fence ensures any attachment changes are resolved before the
>        * next draw */
> -- 
> 2.7.3
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev



More information about the mesa-dev mailing list