[Mesa-dev] [PATCH 3/8] i965: Faking the ETC2 compression on Gen < 8 GPUs using two miptrees.

Nanley Chery nanleychery at gmail.com
Fri Jan 18 23:32:02 UTC 2019


On Mon, Nov 19, 2018 at 10:54:07AM +0200, Eleni Maria Stea wrote:
> GPUs Gen < 8 cannot render ETC2 formats. So far, they converted the
> compressed EAC/ETC2 images to non-compressed RGB format images that they
> can render. When GetCompressed* functions were called, the pixels were
> returned in the RGB format and not the compressed format as expected.
> 
> Trying to fix this problem, we use the shadow miptree to store the
> decompressed data for the rendering and the main miptree to store the
> compressed. We use the BRW_MAP_ETC_BIT as a flag to indicate when we
> use the fake compression in order to map the main tree with the
> compressed data. The functions that upload the compressed data as well
> as the mapping/unmapping functions are now updated to use this flag.

Did you mean sample instead of render?

> ---
>  .../drivers/dri/i965/brw_wm_surface_state.c   | 26 +++++-
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 73 +++++++++++++--
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 17 ++++
>  src/mesa/drivers/dri/i965/intel_tex_image.c   | 45 ++++++++-
>  src/mesa/main/texstore.c                      | 92 +++++++++++--------
>  src/mesa/main/texstore.h                      |  9 ++
>  6 files changed, 204 insertions(+), 58 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index e214fae140..4d1eafac91 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -329,6 +329,17 @@ brw_get_texture_swizzle(const struct gl_context *ctx,
>  {
>     const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
>  
> +   struct brw_context *brw = brw_context((struct gl_context *)ctx);
> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +   bool is_fake_etc = _mesa_is_format_etc2(img->TexFormat) &&
> +                      devinfo->gen < 8;
> +
> +   mesa_format format;
> +   if (is_fake_etc)
> +      format = intel_lower_compressed_format(brw, img->TexFormat);
> +   else
> +      format = img->TexFormat;
> +

Why is modifying this function necessary?

>     int swizzles[SWIZZLE_NIL + 1] = {
>        SWIZZLE_X,
>        SWIZZLE_Y,
> @@ -381,7 +392,7 @@ brw_get_texture_swizzle(const struct gl_context *ctx,
>        }
>     }
>  
> -   GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
> +   GLenum datatype = _mesa_get_format_datatype(format);
>  
>     /* If the texture's format is alpha-only, force R, G, and B to
>      * 0.0. Similarly, if the texture's format has no alpha channel,
> @@ -422,9 +433,9 @@ brw_get_texture_swizzle(const struct gl_context *ctx,
>     case GL_RED:
>     case GL_RG:
>     case GL_RGB:
> -      if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
> -          img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
> -          img->TexFormat == MESA_FORMAT_SRGB_DXT1)
> +      if (_mesa_get_format_bits(format, GL_ALPHA_BITS) > 0 ||
> +          format == MESA_FORMAT_RGB_DXT1 ||
> +          format == MESA_FORMAT_SRGB_DXT1)
>           swizzles[3] = SWIZZLE_ONE;
>        break;
>     }
> @@ -474,6 +485,11 @@ static void brw_update_texture_surface(struct gl_context *ctx,
>        struct intel_texture_object *intel_obj = intel_texture_object(obj);
>        struct intel_mipmap_tree *mt = intel_obj->mt;
>  
> +      if (mt->needs_fake_etc) {
> +         assert(mt->shadow_mt);
> +         mt = mt->shadow_mt;
> +      }
> +
>        if (plane > 0) {
>           if (mt->plane[plane - 1] == NULL)
>              return;
> @@ -512,7 +528,7 @@ static void brw_update_texture_surface(struct gl_context *ctx,
>            * is safe because texture views aren't allowed on depth/stencil.
>            */
>           mesa_fmt = mt->format;
> -      } else if (mt->etc_format != MESA_FORMAT_NONE) {
> +      } else if (intel_obj->mt->etc_format != MESA_FORMAT_NONE) {
>           mesa_fmt = mt->format;

For uniformity, lets access mt->shadow_mt->format here and move the
mt->needs_fake_etc check from above to below this condition:

	} else if (devinfo->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {


>        } else if (plane > 0) {
>           mesa_fmt = mt->format;
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 0e67e4d8f3..b24332ff67 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -689,6 +689,8 @@ miptree_create(struct brw_context *brw,
>     if (devinfo->gen < 6 && _mesa_is_format_color_format(format))
>        tiling_flags &= ~ISL_TILING_Y0_BIT;
>  
> +   bool fakes_etc_compression = devinfo->gen < 8 && _mesa_is_format_etc2(format);
> +
>     mesa_format mt_fmt;
>     if (_mesa_is_format_color_format(format)) {
>        mt_fmt = intel_lower_compressed_format(brw, format);

Why not reserve calling intel_lower_compressed_format() for the case in
which we're creating a fake ETC miptree? Leaving this makes things
unnecessarily complex.

> @@ -700,18 +702,41 @@ miptree_create(struct brw_context *brw,
>                 intel_depth_format_for_depthstencil_format(format);
>     }
>  
> +   mesa_format fmt = fakes_etc_compression ? format : mt_fmt;
>     struct intel_mipmap_tree *mt =
> -      make_surface(brw, target, mt_fmt, first_level, last_level,
> +      make_surface(brw, target, fmt, first_level, last_level,
>                     width0, height0, depth0, num_samples,
> -                   tiling_flags, mt_surf_usage(mt_fmt),
> +                   tiling_flags, mt_surf_usage(fmt),
>                     alloc_flags, 0, NULL);
>  
>     if (mt == NULL)
>        return NULL;
>  
> +   mt->needs_fake_etc = fakes_etc_compression;
> +   if (mt->needs_fake_etc) {
> +      mt->etc_format = format;
> +      mt->shadow_mt = make_surface(brw, target, mt_fmt, first_level,
> +                                   last_level, width0, height0, depth0,
> +                                   num_samples, tiling_flags,
> +                                   mt_surf_usage(mt_fmt),
> +                                   alloc_flags, 0, NULL);
> +      if (mt->shadow_mt == NULL) {
> +         intel_miptree_release(&mt);
> +         return NULL;
> +      }
> +
> +      mt->shadow_mt->etc_format = MESA_FORMAT_NONE;
> +      if (!(flags & MIPTREE_CREATE_NO_AUX))
> +         intel_miptree_choose_aux_usage(brw, mt->shadow_mt);

I don't think we'll use an auxiliary buffer with this miptree.

> +   } else {
> +      mt->etc_format = (_mesa_is_format_color_format(format) &&
> +                        mt_fmt != format) ? format : MESA_FORMAT_NONE;

Won't this always resolve to MESA_FORMAT_NONE?

> +   }
> +
>     if (needs_separate_stencil(brw, mt, format)) {
>        mt->stencil_mt =
> -         make_surface(brw, target, MESA_FORMAT_S_UINT8, first_level, last_level,
> +         make_surface(brw, target, MESA_FORMAT_S_UINT8, first_level,
> +                      last_level,

This change is unrelated.

>                        width0, height0, depth0, num_samples,
>                        ISL_TILING_W_BIT, mt_surf_usage(MESA_FORMAT_S_UINT8),
>                        alloc_flags, 0, NULL);
> @@ -721,9 +746,6 @@ miptree_create(struct brw_context *brw,
>        }
>     }
>  
> -   mt->etc_format = (_mesa_is_format_color_format(format) && mt_fmt != format) ?
> -                    format : MESA_FORMAT_NONE;
> -
>     if (!(flags & MIPTREE_CREATE_NO_AUX))
>        intel_miptree_choose_aux_usage(brw, mt);
>  
> @@ -3700,7 +3722,7 @@ use_intel_mipree_map_blit(struct brw_context *brw,
>   */
>  void
>  intel_miptree_map(struct brw_context *brw,
> -                  struct intel_mipmap_tree *mt,
> +                  struct intel_mipmap_tree *miptree,
>                    unsigned int level,
>                    unsigned int slice,
>                    unsigned int x,
> @@ -3713,9 +3735,31 @@ intel_miptree_map(struct brw_context *brw,
>  {
>     const struct gen_device_info *devinfo = &brw->screen->devinfo;
>     struct intel_miptree_map *map;
> +   struct intel_mipmap_tree *mt = miptree;
>  
>     assert(mt->surf.samples == 1);
>  
> +   if (mt->needs_fake_etc) {
> +      if (!(mode & BRW_MAP_ETC_BIT)) {
> +         assert(mt->shadow_mt);
> +
> +         mt->is_shadow_mapped = true;
> +
> +         mt->shadow_needs_update = false;
> +         mt = miptree->shadow_mt;
> +      } else {
> +         mt->is_shadow_mapped = false;
> +
> +         /*
> +          * We need to update the shadow miptree every time we invalidate the
> +          * main miptree or we map it for writing.
> +          */
> +         if (mode & GL_MAP_WRITE_BIT || mode & GL_MAP_INVALIDATE_RANGE_BIT) {
> +            mt->shadow_needs_update = true;

This won't catch all the cases in which the shadow needs updating.
You'll need to update intel_miptree_finish_write() for that. I have a
similar patch here: https://patchwork.freedesktop.org/patch/253199/

> +         }
> +      }
> +   }
> +
>     map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
>     if (!map){
>        *out_ptr = NULL;
> @@ -3726,7 +3770,7 @@ intel_miptree_map(struct brw_context *brw,
>     if (mt->format == MESA_FORMAT_S_UINT8) {
>        intel_miptree_map_s8(brw, mt, map, level, slice);
>     } else if (mt->etc_format != MESA_FORMAT_NONE &&
> -              !(mode & BRW_MAP_DIRECT_BIT)) {
> +              !(mode & BRW_MAP_DIRECT_BIT) && !mt->needs_fake_etc) {
>        intel_miptree_map_etc(brw, mt, map, level, slice);
>     } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
>        intel_miptree_map_depthstencil(brw, mt, map, level, slice);
> @@ -3755,10 +3799,21 @@ intel_miptree_map(struct brw_context *brw,
>  
>  void
>  intel_miptree_unmap(struct brw_context *brw,
> -                    struct intel_mipmap_tree *mt,
> +                    struct intel_mipmap_tree *miptree,
>                      unsigned int level,
>                      unsigned int slice)
>  {
> +   struct intel_mipmap_tree *mt = miptree;
> +
> +   if (miptree->needs_fake_etc) {
> +      if (miptree->is_shadow_mapped) {
> +         assert(miptree->shadow_mt);
> +
> +         miptree->is_shadow_mapped = false;
> +         mt = miptree->shadow_mt;
> +      }
> +   }
> +

Instead of mapping the shadow for writes, I think it'd be simpler to
always give access to the main miptree when mapping, then decompress the
data into the shadow during the unmap operation.

>     struct intel_miptree_map *map = mt->level[level].slice[slice].map;
>  
>     assert(mt->surf.samples == 1);
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> index b955a2bab1..aca45cfaa4 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> @@ -74,6 +74,7 @@ struct intel_texture_image;
>   * without transcoding back.  This flag to intel_miptree_map() gets you that.
>   */
>  #define BRW_MAP_DIRECT_BIT	0x80000000
> +#define BRW_MAP_ETC_BIT 0x40000000
>  
>  struct intel_miptree_map {
>     /** Bitfield of GL_MAP_*_BIT and BRW_MAP_*_BIT. */
> @@ -305,6 +306,22 @@ struct intel_mipmap_tree
>     struct intel_mipmap_tree *shadow_mt;
>     bool shadow_needs_update;
>  
> +   /**
> +	 * \brief Indicates that the shadow miptree needs to be updated
> +    *
> +    * It's necessary when we map the shadow tree to fake the ETC2 compression
> +    * so that we know that we have to unmap the shadow and not the main.\n
> +    */
> +   bool is_shadow_mapped;
> +
> +   /**
> +    * \brief Indicates that we fake the ETC2 compression support
> +    *
> +    * GPUs Gen < 8 don't support sampling and rendering of ETC2 formats so
> +    * we need to fake it. This variable is set to true when we fake it.
> +    */
> +   bool needs_fake_etc;
> +

Let's make a function to detect needs_fake_etc instead of adding to the data
structure. That'd be easier to follow.

-Nanley

>     /**
>      * \brief CCS, MCS, or HiZ auxiliary buffer.
>      *
> diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
> index bdcdb7736e..a8abb69118 100644
> --- a/src/mesa/drivers/dri/i965/intel_tex_image.c
> +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
> @@ -903,6 +903,43 @@ flush_astc_denorms(struct gl_context *ctx, GLuint dims,
>     }
>  }
>  
> +static void intel_compressed_texsubimage(struct gl_context *ctx,
> +                                         GLuint dims,
> +                                         struct gl_texture_image *texImage,
> +                                         GLint xoffset, GLint yoffset,
> +                                         GLint zoffset, GLsizei width,
> +                                         GLsizei height, GLsizei depth,
> +                                         GLenum format, GLsizei imageSize,
> +                                         const GLvoid *data)
> +{
> +   GLbitfield mode = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT;
> +   struct compressed_pixelstore store;
> +
> +   if (dims == 1) {
> +      _mesa_problem(ctx, "Unexpected 1D compressed texsubimage call");
> +      return;
> +   }
> +
> +   _mesa_compute_compressed_pixelstore(dims, texImage->TexFormat,
> +                                       width, height, depth,
> +                                       &ctx->Unpack, &store);
> +
> +   data = _mesa_validate_pbo_compressed_teximage(ctx, dims, imageSize,
> +                                                 data, &ctx->Unpack,
> +                                                 "glCompressedTexSubImage");
> +   if (!data)
> +      return;
> +
> +   GLbitfield etc_mode = mode;
> +   if (intel_texture_image(texImage)->mt->needs_fake_etc)
> +      etc_mode |= BRW_MAP_ETC_BIT;
> +
> +   _mesa_upload_compressed_texsubimage(ctx, dims, &store, texImage,
> +                                       xoffset, yoffset, zoffset, width,
> +                                       height, depth, etc_mode, data);
> +
> +   _mesa_unmap_teximage_pbo(ctx, &ctx->Unpack);
> +}
>  
>  static void
>  intelCompressedTexSubImage(struct gl_context *ctx, GLuint dims,
> @@ -913,10 +950,10 @@ intelCompressedTexSubImage(struct gl_context *ctx, GLuint dims,
>                          GLsizei imageSize, const GLvoid *data)
>  {
>     /* Upload the compressed data blocks */
> -   _mesa_store_compressed_texsubimage(ctx, dims, texImage,
> -                                      xoffset, yoffset, zoffset,
> -                                      width, height, depth,
> -                                      format, imageSize, data);
> +   intel_compressed_texsubimage(ctx, dims, texImage,
> +                                xoffset, yoffset, zoffset,
> +                                width, height, depth,
> +                                format, imageSize, data);
>  
>     /* Fix up copied ASTC blocks if necessary */
>     GLenum gl_format = _mesa_compressed_format_to_glenum(ctx,
> diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
> index 2913d4bc06..38043f5dba 100644
> --- a/src/mesa/main/texstore.c
> +++ b/src/mesa/main/texstore.c
> @@ -1320,6 +1320,54 @@ _mesa_compute_compressed_pixelstore(GLuint dims, mesa_format texFormat,
>     }
>  }
>  
> +void
> +_mesa_upload_compressed_texsubimage(struct gl_context *ctx, GLuint dims,
> +                                    struct compressed_pixelstore *store,
> +                                    struct gl_texture_image *texImage,
> +                                    GLint xoffset, GLint yoffset,
> +                                    GLint zoffset, GLsizei width,
> +                                    GLsizei height, GLsizei depth,
> +                                    GLbitfield mode, const GLvoid *data)
> +{
> +   GLubyte *dstMap = NULL;
> +   GLint dstRowStride;
> +   GLint i, slice;
> +   const GLubyte *src = (const GLubyte *) data + store->SkipBytes;
> +
> +   for (slice = 0; slice < store->CopySlices; slice++) {
> +      /* Map dest texture buffer */
> +      ctx->Driver.MapTextureImage(ctx, texImage, slice + zoffset,
> +                                  xoffset, yoffset, width, height,
> +                                  mode,
> +                                  &dstMap, &dstRowStride);
> +
> +      if (dstMap) {
> +         /* copy rows of blocks */
> +         if (dstRowStride == store->TotalBytesPerRow &&
> +             dstRowStride == store->CopyBytesPerRow) {
> +            memcpy(dstMap, src, store->CopyBytesPerRow * store->CopyRowsPerSlice);
> +            src += store->CopyBytesPerRow * store->CopyRowsPerSlice;
> +         }
> +         else {
> +            for (i = 0; i < store->CopyRowsPerSlice; i++) {
> +               memcpy(dstMap, src, store->CopyBytesPerRow);
> +               dstMap += dstRowStride;
> +               src += store->TotalBytesPerRow;
> +            }
> +         }
> +
> +         ctx->Driver.UnmapTextureImage(ctx, texImage, slice + zoffset);
> +
> +         /* advance to next slice */
> +         src += store->TotalBytesPerRow * (store->TotalRowsPerSlice
> +                                          - store->CopyRowsPerSlice);
> +      }
> +      else {
> +         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage%uD",
> +                     dims);
> +      }
> +   }
> +}
>  
>  /**
>   * Fallback for Driver.CompressedTexSubImage()
> @@ -1333,10 +1381,7 @@ _mesa_store_compressed_texsubimage(struct gl_context *ctx, GLuint dims,
>                                     GLsizei imageSize, const GLvoid *data)
>  {
>     struct compressed_pixelstore store;
> -   GLint dstRowStride;
> -   GLint i, slice;
> -   GLubyte *dstMap;
> -   const GLubyte *src;
> +   GLbitfield mode = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT;
>  
>     if (dims == 1) {
>        _mesa_problem(ctx, "Unexpected 1D compressed texsubimage call");
> @@ -1354,42 +1399,9 @@ _mesa_store_compressed_texsubimage(struct gl_context *ctx, GLuint dims,
>     if (!data)
>        return;
>  
> -   src = (const GLubyte *) data + store.SkipBytes;
> -
> -   for (slice = 0; slice < store.CopySlices; slice++) {
> -      /* Map dest texture buffer */
> -      ctx->Driver.MapTextureImage(ctx, texImage, slice + zoffset,
> -                                  xoffset, yoffset, width, height,
> -                                  GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,
> -                                  &dstMap, &dstRowStride);
> -
> -      if (dstMap) {
> -
> -         /* copy rows of blocks */
> -         if (dstRowStride == store.TotalBytesPerRow &&
> -             dstRowStride == store.CopyBytesPerRow) {
> -            memcpy(dstMap, src, store.CopyBytesPerRow * store.CopyRowsPerSlice);
> -            src += store.CopyBytesPerRow * store.CopyRowsPerSlice;
> -         }
> -         else {
> -            for (i = 0; i < store.CopyRowsPerSlice; i++) {
> -               memcpy(dstMap, src, store.CopyBytesPerRow);
> -               dstMap += dstRowStride;
> -               src += store.TotalBytesPerRow;
> -            }
> -         }
> -
> -         ctx->Driver.UnmapTextureImage(ctx, texImage, slice + zoffset);
> -
> -         /* advance to next slice */
> -         src += store.TotalBytesPerRow * (store.TotalRowsPerSlice
> -                                          - store.CopyRowsPerSlice);
> -      }
> -      else {
> -         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage%uD",
> -                     dims);
> -      }
> -   }
> +   _mesa_upload_compressed_texsubimage(ctx, dims, &store, texImage,
> +                                       xoffset, yoffset, zoffset, width,
> +                                       height, depth, mode, data);
>  
>     _mesa_unmap_teximage_pbo(ctx, &ctx->Unpack);
>  }
> diff --git a/src/mesa/main/texstore.h b/src/mesa/main/texstore.h
> index 2fef7ba7d7..c70ac77972 100644
> --- a/src/mesa/main/texstore.h
> +++ b/src/mesa/main/texstore.h
> @@ -158,6 +158,15 @@ struct compressed_pixelstore {
>     int CopySlices;
>  };
>  
> +extern void
> +_mesa_upload_compressed_texsubimage(struct gl_context *ctx,
> +                                    GLuint dims,
> +                                    struct compressed_pixelstore *store,
> +                                    struct gl_texture_image *texImage,
> +                                    GLint xoffset, GLint yoffset,
> +                                    GLint zoffset, GLsizei width,
> +                                    GLsizei height, GLsizei depth,
> +                                    GLbitfield mode, const GLvoid *data);
>  
>  extern void
>  _mesa_compute_compressed_pixelstore(GLuint dims, mesa_format texFormat,
> -- 
> 2.19.0
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list