[Mesa-dev] [PATCH v3] i965/miptree: Use cpu tiling/detiling when mapping

Nanley Chery nanleychery at gmail.com
Wed Mar 14 17:14:15 UTC 2018


On Mon, Mar 12, 2018 at 10:52:55AM -0700, Scott D Phillips wrote:
> Rename the (un)map_gtt functions to (un)map_map (map by
> returning a map) and add new functions (un)map_tiled_memcpy that
> return a shadow buffer populated with the intel_tiled_memcpy
> functions.
>
> Tiling/detiling with the cpu will be the only way to handle Yf/Ys
> tiling, when support is added for those formats.
>
> v2: Compute extents properly in the x|y-rounded-down case (Chris Wilson)
>
> v3: Add units to parameter names of tile_extents (Nanley Chery)
>     Use _mesa_align_malloc for the shadow copy (Nanley)
>     Continue using gtt maps on gen4 (Nanley)
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 94 ++++++++++++++++++++++++---
>  1 file changed, 86 insertions(+), 8 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index c6213b21629..fba17bf5b7b 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -31,6 +31,7 @@
>  #include "intel_image.h"
>  #include "intel_mipmap_tree.h"
>  #include "intel_tex.h"
> +#include "intel_tiled_memcpy.h"
>  #include "intel_blit.h"
>  #include "intel_fbo.h"
>  
> @@ -3046,10 +3047,10 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
>  }
>  
>  static void
> -intel_miptree_map_gtt(struct brw_context *brw,
> -      struct intel_mipmap_tree *mt,
> -      struct intel_miptree_map *map,
> -      unsigned int level, unsigned int slice)
> +intel_miptree_map_map(struct brw_context *brw,
> +                      struct intel_mipmap_tree *mt,
> +                      struct intel_miptree_map *map,
> +                      unsigned int level, unsigned int slice)
>  {
>     unsigned int bw, bh;
>     void *base;
> @@ -3093,11 +3094,81 @@ intel_miptree_map_gtt(struct brw_context *brw,
>  }
>  
>  static void
> -intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
> +intel_miptree_unmap_map(struct intel_mipmap_tree *mt)
>  {
>     intel_miptree_unmap_raw(mt);
>  }
>  
> +/* Compute extent parameters for use with tiled_memcpy functions.
> + * xs are in units of bytes and ys are in units of strides. */
> +static inline void
> +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map,
> +             unsigned int level, unsigned int slice, unsigned int *x1_B,
> +             unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el)
> +{
> +   unsigned int block_width, block_height;
> +   unsigned int x0_el, y0_el;
> +
> +   _mesa_get_format_block_size(mt->format, &block_width, &block_height);
> +
> +   assert(map->x % block_width == 0);
> +   assert(map->y % block_height == 0);
> +
> +   intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el);
> +   *x1_B = (map->x / block_width + x0_el) * mt->cpp;
> +   *y1_el = map->y / block_height + y0_el;
> +   *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp;
> +   *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el;
> +}
> +
> +static void
> +intel_miptree_map_tiled_memcpy(struct brw_context *brw,
> +                               struct intel_mipmap_tree *mt,
> +                               struct intel_miptree_map *map,
> +                               unsigned int level, unsigned int slice)
> +{
> +   unsigned int x1, x2, y1, y2;
> +   tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
> +   map->stride = _mesa_format_row_stride(mt->format, map->w);
> +   map->buffer = map->ptr = _mesa_align_malloc(map->stride * (y2 - y1), 16);
> +
> +   assert(map->ptr);
> +
> +   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {

It looks like we'll generate extra copies using this function, but only
in a few corner cases. I think the following places should be using the
INVALIDATE flag, but aren't:
* _mesa_store_cleartexsubimage
* generate_mipmap_uncompressed

> +      char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
> +      src += mt->offset;
> +

It seems possible that the buffer object had a WC memory type during
rendering. In that case, we need an sfence here right?

This stuff is pretty new to me, so perhaps others would like to chime
in.

> +      tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride,
> +                      mt->surf.row_pitch, brw->has_swizzling, mt->surf.tiling,
> +                      memcpy);
> +
> +      intel_miptree_unmap_raw(mt);
> +   }
> +}
> +
> +static void
> +intel_miptree_unmap_tiled_memcpy(struct brw_context *brw,
> +                                 struct intel_mipmap_tree *mt,
> +                                 struct intel_miptree_map *map,
> +                                 unsigned int level,
> +                                 unsigned int slice)
> +{
> +   if (map->mode & GL_MAP_WRITE_BIT) {
> +      unsigned int x1, x2, y1, y2;
> +      tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
> +
> +      char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
> +      dst += mt->offset;
> +
> +      linear_to_tiled(x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch,
> +                      map->stride, brw->has_swizzling, mt->surf.tiling, memcpy);

On non-LLC platforms, the bo is mapped WC. Given the weak ordering of
memory operations, the GPU may not see the stored data. Shouldn't we
call sfence for these platforms?

This quote from the Section 11.3.1 of the SDM (Vol3A) seems to support this:

   When using the WC memory type, software must be sensitive to the fact
   that the writing of data to system memory is being delayed and must
   deliberately empty the WC buffers when system memory coherency is
   required.

> +
> +      intel_miptree_unmap_raw(mt);
> +   }
> +   _mesa_align_free(map->buffer);
> +   map->buffer = map->ptr = NULL;
> +}
> +
>  static void
>  intel_miptree_map_blit(struct brw_context *brw,
>         struct intel_mipmap_tree *mt,
> @@ -3655,8 +3726,10 @@ intel_miptree_map(struct brw_context *brw,
>                (mt->surf.row_pitch % 16 == 0)) {
>        intel_miptree_map_movntdqa(brw, mt, map, level, slice);
>  #endif
> +   } else if (mt->surf.tiling != ISL_TILING_LINEAR && brw->screen->devinfo.gen > 4) {

This line and the one like it below have crossed the 80 char-limit.

> +      intel_miptree_map_tiled_memcpy(brw, mt, map, level, slice);
>     } else {
> -      intel_miptree_map_gtt(brw, mt, map, level, slice);
> +      intel_miptree_map_map(brw, mt, map, level, slice);
>     }
>  
>     *out_ptr = map->ptr;
> @@ -3692,11 +3765,16 @@ intel_miptree_unmap(struct brw_context *brw,
>     } else if (map->linear_mt) {
>        intel_miptree_unmap_blit(brw, mt, map, level, slice);
>  #if defined(USE_SSE41)
> -   } else if (map->buffer && cpu_has_sse4_1) {
> +   } else if (!(map->mode & GL_MAP_WRITE_BIT) &&
> +              !mt->compressed && cpu_has_sse4_1 &&
> +              (mt->surf.row_pitch % 16 == 0) &&
> +              map->buffer) {
>        intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
>  #endif
> +   } else if (mt->surf.tiling != ISL_TILING_LINEAR && brw->screen->devinfo.gen > 4) {

-Nanley

> +      intel_miptree_unmap_tiled_memcpy(brw, mt, map, level, slice);
>     } else {
> -      intel_miptree_unmap_gtt(mt);
> +      intel_miptree_unmap_map(mt);
>     }
>  
>     intel_miptree_release_map(mt, level, slice);
> --
> 2.14.3
>


More information about the mesa-dev mailing list