[Mesa-dev] [PATCH v2 29/35] i965/blorp: Only do offset hacks for fake W-tiling and IMS

Pohjolainen, Topi topi.pohjolainen at intel.com
Thu Jul 28 12:59:11 UTC 2016


On Tue, Jul 26, 2016 at 03:02:20PM -0700, Jason Ekstrand wrote:
> Since the dawn of time, blorp has used offsets directly to get at different
> mip levels and array slices of surfaces.  This isn't really necessary since
> we can just use the base level/layer provided in the surface state.  While
> it may have simplified blorp's original design, we haven't been using the
> blorp path for surface state on gen8 thanks to render compression and
> there's really no good need for it most of the time.  This commit restricts
> such surface munging to the cases of fake W-tiling and fake interleaved
> multisampling.
> ---
>  src/mesa/drivers/dri/i965/brw_blorp.c        |  74 ++---------
>  src/mesa/drivers/dri/i965/brw_blorp.h        |   6 +
>  src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 185 +++++++++++++++++++--------
>  3 files changed, 152 insertions(+), 113 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c
> index 64e507a..215f765 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.c
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
> @@ -90,7 +90,7 @@ get_image_offset_sa_gen6_stencil(const struct isl_surf *surf,
>     *y_offset_sa = y;
>  }
>  
> -static void
> +void

I noticed that this is now only used in brw_blorp_blit.cpp, should we just
move it there instead?

>  blorp_get_image_offset_sa(struct isl_device *dev, const struct isl_surf *surf,
>                            uint32_t level, uint32_t layer,
>                            uint32_t *x_offset_sa,
> @@ -100,60 +100,11 @@ blorp_get_image_offset_sa(struct isl_device *dev, const struct isl_surf *surf,
>        get_image_offset_sa_gen6_stencil(surf, level, layer,
>                                         x_offset_sa, y_offset_sa);
>     } else {
> -      /* Using base_array_layer for Z in 3-D surfaces is a bit abusive, but it
> -       * will go away soon enough.
> -       */
> -      uint32_t z = 0;
> -      if (surf->dim == ISL_SURF_DIM_3D) {
> -         z = layer;
> -         layer = 0;
> -      }
> -
> -      isl_surf_get_image_offset_sa(surf, level, layer, z,
> +      isl_surf_get_image_offset_sa(surf, level, layer, 0,
>                                     x_offset_sa, y_offset_sa);
>     }
>  }
>  
> -static void
> -surf_apply_level_layer_offsets(struct isl_device *dev, struct isl_surf *surf,
> -                               struct isl_view *view, uint32_t *byte_offset,
> -                               uint32_t *tile_x_sa, uint32_t *tile_y_sa)
> -{
> -   /* This only makes sense for a single level and array slice */
> -   assert(view->levels == 1 && view->array_len == 1);
> -
> -   uint32_t x_offset_sa, y_offset_sa;
> -   blorp_get_image_offset_sa(dev, surf, view->base_level,
> -                             view->base_array_layer,
> -                             &x_offset_sa, &y_offset_sa);
> -
> -   isl_tiling_get_intratile_offset_sa(dev, surf->tiling, view->format,
> -                                      surf->row_pitch, x_offset_sa, y_offset_sa,
> -                                      byte_offset, tile_x_sa, tile_y_sa);
> -
> -   /* Now that that's done, we have a very bare 2-D surface */
> -   surf->dim = ISL_SURF_DIM_2D;
> -   surf->dim_layout = ISL_DIM_LAYOUT_GEN4_2D;
> -
> -   surf->logical_level0_px.width =
> -      minify(surf->logical_level0_px.width, view->base_level);
> -   surf->logical_level0_px.height =
> -      minify(surf->logical_level0_px.height, view->base_level);
> -   surf->logical_level0_px.depth = 1;
> -   surf->logical_level0_px.array_len = 1;
> -   surf->levels = 1;
> -
> -   /* Alignment doesn't matter since we have 1 miplevel and 1 array slice so
> -    * just pick something that works for everybody.
> -    */
> -   surf->image_alignment_el = isl_extent3d(4, 4, 1);
> -
> -   /* TODO: surf->physcal_level0_extent_sa? */

I was wondering about this in the introducing patch...

> -
> -   view->base_level = 0;
> -   view->base_array_layer = 0;
> -}
> -
>  void
>  brw_blorp_surface_info_init(struct brw_context *brw,
>                              struct brw_blorp_surface_info *info,
> @@ -191,8 +142,6 @@ brw_blorp_surface_info_init(struct brw_context *brw,
>        .format = ISL_FORMAT_UNSUPPORTED, /* Set later */
>        .base_level = level,

This still prevents surf_convert_to_single_slice() from skipping the munging,
I think. Shouldn't we drop it also?

>        .levels = 1,
> -      .base_array_layer = layer / layer_multiplier,
> -      .array_len = 1,
>        .channel_select = {
>           ISL_CHANNEL_SELECT_RED,
>           ISL_CHANNEL_SELECT_GREEN,
> @@ -201,12 +150,21 @@ brw_blorp_surface_info_init(struct brw_context *brw,
>        },
>     };
>  
> -   if (brw->gen >= 8 && !is_render_target && info->surf.dim == ISL_SURF_DIM_3D) {
> -      /* On gen8+ we use actual 3-D textures so we need to pass the layer
> -       * through to the sampler.
> +   if (!is_render_target &&
> +       (info->surf.dim == ISL_SURF_DIM_3D ||
> +        info->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY)) {
> +      /* 3-D textures don't support base_array layer and neither do 2-D
> +       * multisampled textures on IVB so we need to pass it through the
> +       * sampler in those cases.  These are also two cases where we are
> +       * guaranteed that we won't be doing any funny surface hacks.
>         */
> +      info->view.base_array_layer = 0;
> +      info->view.array_len = MAX2(info->surf.logical_level0_px.depth,
> +                                  info->surf.logical_level0_px.array_len);
>        info->z_offset = layer / layer_multiplier;
>     } else {
> +      info->view.base_array_layer = layer / layer_multiplier;
> +      info->view.array_len = 1;
>        info->z_offset = 0;
>     }
>  
> @@ -252,10 +210,6 @@ brw_blorp_surface_info_init(struct brw_context *brw,
>        break;
>     }
>     }
> -
> -   surf_apply_level_layer_offsets(&brw->isl_dev, &info->surf, &info->view,
> -                                  &info->bo_offset,
> -                                  &info->tile_x_sa, &info->tile_y_sa);
>  }
>  
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
> index ec12dfe..706d53e 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.h
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.h
> @@ -338,6 +338,12 @@ brw_blorp_compile_nir_shader(struct brw_context *brw, struct nir_shader *nir,
>                               struct brw_blorp_prog_data *prog_data,
>                               unsigned *program_size);
>  
> +void
> +blorp_get_image_offset_sa(struct isl_device *dev, const struct isl_surf *surf,
> +                          uint32_t level, uint32_t layer,
> +                          uint32_t *x_offset_sa,
> +                          uint32_t *y_offset_sa);
> +
>  uint32_t
>  brw_blorp_emit_surface_state(struct brw_context *brw,
>                               const struct brw_blorp_surface_info *surface,
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> index a35cdb3..007c061 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> @@ -1586,6 +1586,115 @@ swizzle_to_scs(GLenum swizzle)
>     return (enum isl_channel_select)((swizzle + 4) & 7);
>  }
>  
> +static void
> +surf_convert_to_single_slice(struct brw_context *brw,
> +                             struct brw_blorp_surface_info *info)
> +{
> +   /* This only makes sense for a single level and array slice */
> +   assert(info->view.levels == 1 && info->view.array_len == 1);
> +
> +   /* Just bail if we have nothing to do. */
> +   if (info->surf.dim == ISL_SURF_DIM_2D &&
> +       info->view.base_level == 0 && info->view.base_array_layer == 0 &&
> +       info->surf.levels == 0 && info->surf.logical_level0_px.array_len == 0)
> +      return;
> +
> +   uint32_t x_offset_sa, y_offset_sa;
> +   blorp_get_image_offset_sa(&brw->isl_dev, &info->surf, info->view.base_level,
> +                             info->view.base_array_layer,
> +                             &x_offset_sa, &y_offset_sa);
> +
> +   isl_tiling_get_intratile_offset_sa(&brw->isl_dev, info->surf.tiling,
> +                                      info->view.format, info->surf.row_pitch,
> +                                      x_offset_sa, y_offset_sa,
> +                                      &info->bo_offset,
> +                                      &info->tile_x_sa, &info->tile_y_sa);
> +
> +   /* TODO: Once this file gets converted to C, we shouls just use designated
> +    * initializers.
> +    */
> +   struct isl_surf_init_info init_info = isl_surf_init_info();
> +
> +   init_info.dim = ISL_SURF_DIM_2D;
> +   init_info.format = ISL_FORMAT_R8_UINT;
> +   init_info.width =
> +      minify(info->surf.logical_level0_px.width, info->view.base_level);
> +   init_info.height =
> +      minify(info->surf.logical_level0_px.height, info->view.base_level);
> +   init_info.depth = 1;
> +   init_info.levels = 1;
> +   init_info.array_len = 1;
> +   init_info.samples = info->surf.samples;
> +   init_info.min_pitch = info->surf.row_pitch;
> +   init_info.usage = info->surf.usage;
> +   init_info.tiling_flags = 1 << info->surf.tiling;
> +
> +   isl_surf_init_s(&brw->isl_dev, &info->surf, &init_info);
> +   assert(info->surf.row_pitch == init_info.min_pitch);
> +
> +   /* The view is also different now. */
> +   info->view.base_level = 0;
> +   info->view.levels = 1;
> +   info->view.base_array_layer = 0;
> +   info->view.array_len = 1;
> +}
> +
> +static void
> +surf_fake_interleaved_msaa(struct brw_context *brw,
> +                           struct brw_blorp_surface_info *info)
> +{
> +   assert(info->surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED);
> +
> +   /* First, we need to convert it to a simple 1-level 1-layer 2-D surface */
> +   surf_convert_to_single_slice(brw, info);
> +
> +   info->surf.logical_level0_px = info->surf.phys_level0_sa;
> +   info->surf.samples = 1;
> +   info->surf.msaa_layout = ISL_MSAA_LAYOUT_NONE;
> +}
> +
> +static void
> +surf_retile_w_to_y(struct brw_context *brw,
> +                   struct brw_blorp_surface_info *info)
> +{
> +   assert(info->surf.tiling == ISL_TILING_W);
> +
> +   /* First, we need to convert it to a simple 1-level 1-layer 2-D surface */
> +   surf_convert_to_single_slice(brw, info);
> +
> +   /* On gen7+, we don't have interleaved multisampling for color render
> +    * targets so we have to fake it.
> +    *
> +    * TODO: Are we sure we don't also need to fake it on gen6?
> +    */
> +   if (brw->gen > 6 && info->surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) {
> +      info->surf.logical_level0_px = info->surf.phys_level0_sa;
> +      info->surf.samples = 1;
> +      info->surf.msaa_layout = ISL_MSAA_LAYOUT_NONE;
> +   }
> +
> +   if (brw->gen == 6) {
> +      /* Gen6 stencil buffers have a very large alignment coming in from the
> +       * miptree.  It's out-of-bounds for what the surface state can handle.
> +       * Since we have a single layer and level, it doesn't really matter as
> +       * long as we don't pass a bogus value into isl_surf_fill_state().
> +       */
> +      info->surf.image_alignment_el = isl_extent3d(4, 2, 1);
> +   }
> +
> +   /* Now that we've converted everything to a simple 2-D surface with only
> +    * one miplevel, we can go about retiling it.
> +    */
> +   const unsigned x_align = 8, y_align = info->surf.samples != 0 ? 8 : 4;
> +   info->surf.tiling = ISL_TILING_Y0;
> +   info->surf.logical_level0_px.width =
> +      ALIGN(info->surf.logical_level0_px.width, x_align) * 2;
> +   info->surf.logical_level0_px.height =
> +      ALIGN(info->surf.logical_level0_px.height, y_align) / 2;
> +   info->tile_x_sa *= 2;
> +   info->tile_y_sa /= 2;
> +}
> +
>  /**
>   * Note: if the src (or dst) is a 2D multisample array texture on Gen7+ using
>   * INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, src_layer (dst_layer) is
> @@ -1782,7 +1891,10 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
>     /* For some texture types, we need to pass the layer through the sampler. */
>     params.wm_inputs.src_z = params.src.z_offset;
>  
> -   if (brw->gen > 6 && dst_mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
> +   if (brw->gen > 6 &&
> +       params.dst.surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) {
> +      assert(params.dst.surf.samples > 1);
> +
>        /* We must expand the rectangle we send through the rendering pipeline,
>         * to account for the fact that we are mapping the destination region as
>         * single-sampled when it is in fact multisampled.  We must also align
> @@ -1795,71 +1907,41 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
>         * If it's UMS, then we have no choice but to set up the rendering
>         * pipeline as multisampled.
>         */
> -      assert(params.dst.surf.msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED);
>        switch (params.dst.surf.samples) {
>        case 2:
>           params.x0 = ROUND_DOWN_TO(params.x0 * 2, 4);
>           params.y0 = ROUND_DOWN_TO(params.y0, 4);
>           params.x1 = ALIGN(params.x1 * 2, 4);
>           params.y1 = ALIGN(params.y1, 4);
> -         params.dst.surf.logical_level0_px.width *= 2;
>           break;
>        case 4:
>           params.x0 = ROUND_DOWN_TO(params.x0 * 2, 4);
>           params.y0 = ROUND_DOWN_TO(params.y0 * 2, 4);
>           params.x1 = ALIGN(params.x1 * 2, 4);
>           params.y1 = ALIGN(params.y1 * 2, 4);
> -         params.dst.surf.logical_level0_px.width *= 2;
> -         params.dst.surf.logical_level0_px.height *= 2;
>           break;
>        case 8:
>           params.x0 = ROUND_DOWN_TO(params.x0 * 4, 8);
>           params.y0 = ROUND_DOWN_TO(params.y0 * 2, 4);
>           params.x1 = ALIGN(params.x1 * 4, 8);
>           params.y1 = ALIGN(params.y1 * 2, 4);
> -         params.dst.surf.logical_level0_px.width *= 4;
> -         params.dst.surf.logical_level0_px.height *= 2;
>           break;
>        case 16:
>           params.x0 = ROUND_DOWN_TO(params.x0 * 4, 8);
>           params.y0 = ROUND_DOWN_TO(params.y0 * 4, 8);
>           params.x1 = ALIGN(params.x1 * 4, 8);
>           params.y1 = ALIGN(params.y1 * 4, 8);
> -         params.dst.surf.logical_level0_px.width *= 4;
> -         params.dst.surf.logical_level0_px.height *= 4;
>           break;
>        default:
>           unreachable("Unrecognized sample count in brw_blorp_blit_params ctor");
>        }
>  
> -      /* Gen7's rendering hardware only supports the IMS layout for depth and
> -       * stencil render targets.  Blorp always maps its destination surface as
> -       * a color render target (even if it's actually a depth or stencil
> -       * buffer).  So if the destination is IMS, we'll have to map it as a
> -       * single-sampled texture and interleave the samples ourselves.
> -       */
> -      params.dst.surf.samples = 1;
> -      params.dst.surf.msaa_layout = ISL_MSAA_LAYOUT_NONE;
> +      surf_fake_interleaved_msaa(brw, &params.dst);
>  
>        wm_prog_key.use_kill = true;
>     }
>  
>     if (params.dst.surf.tiling == ISL_TILING_W) {
> -      /* We need to fake W-tiling with Y-tiling */
> -      params.dst.surf.tiling = ISL_TILING_Y0;
> -
> -      wm_prog_key.dst_tiled_w = true;

This is just moved further down, right?

> -
> -      if (params.dst.surf.samples > 1) {
> -         /* If the destination surface is a W-tiled multisampled stencil
> -          * buffer that we're mapping as Y tiled, then we need to arrange for
> -          * the WM program to run once per sample rather than once per pixel,
> -          * because the memory layout of related samples doesn't match between
> -          * W and Y tiling.
> -          */
> -         wm_prog_key.persample_msaa_dispatch = true;
> -      }
> -
>        /* We must modify the rectangle we send through the rendering pipeline
>         * (and the size and x/y offset of the destination surface), to account
>         * for the fact that we are mapping it as Y-tiled when it is in fact
> @@ -1911,39 +1993,36 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
>        params.y0 = ROUND_DOWN_TO(params.y0, y_align) / 2;
>        params.x1 = ALIGN(params.x1, x_align) * 2;
>        params.y1 = ALIGN(params.y1, y_align) / 2;
> -      params.dst.surf.logical_level0_px.width =
> -         ALIGN(params.dst.surf.logical_level0_px.width, x_align) * 2;
> -      params.dst.surf.logical_level0_px.height =
> -         ALIGN(params.dst.surf.logical_level0_px.height, y_align) / 2;
> -      params.dst.tile_x_sa *= 2;
> -      params.dst.tile_y_sa /= 2;
> +
> +      /* Retile the surface to Y-tiled */
> +      surf_retile_w_to_y(brw, &params.dst);
> +
> +      wm_prog_key.dst_tiled_w = true;
>        wm_prog_key.use_kill = true;
> +
> +      if (params.dst.surf.samples > 1) {
> +         /* If the destination surface is a W-tiled multisampled stencil
> +          * buffer that we're mapping as Y tiled, then we need to arrange for
> +          * the WM program to run once per sample rather than once per pixel,
> +          * because the memory layout of related samples doesn't match between
> +          * W and Y tiling.
> +          */
> +         wm_prog_key.persample_msaa_dispatch = true;
> +      }
>     }
>  
>     if (brw->gen < 8 && params.src.surf.tiling == ISL_TILING_W) {
>        /* On Haswell and earlier, we have to fake W-tiled sources as Y-tiled.
>         * Broadwell adds support for sampling from stencil.
> -       */
> -      params.src.surf.tiling = ISL_TILING_Y0;
> -
> -      wm_prog_key.src_tiled_w = true;

Same as this?

> -
> -      /* We must modify the size and x/y offset of the source surface to
> -       * account for the fact that we are mapping it as Y-tiled when it is in
> -       * fact W tiled.
>         *
>         * See the comments above concerning x/y offset alignment for the
>         * destination surface.
>         *
>         * TODO: what if this makes the texture size too large?
>         */
> -      const unsigned x_align = 8, y_align = params.src.surf.samples != 0 ? 8 : 4;
> -      params.src.surf.logical_level0_px.width =
> -         ALIGN(params.src.surf.logical_level0_px.width, x_align) * 2;
> -      params.src.surf.logical_level0_px.height =
> -         ALIGN(params.src.surf.logical_level0_px.height, y_align) / 2;
> -      params.src.tile_x_sa *= 2;
> -      params.src.tile_y_sa /= 2;
> +      surf_retile_w_to_y(brw, &params.src);
> +
> +      wm_prog_key.src_tiled_w = true;
>     }
>  
>     /* tex_samples and rt_samples are the sample counts that are set up in
> -- 
> 2.5.0.400.gff86faf
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list