[Mesa-dev] [PATCH 3/8] i965: Faking the ETC2 compression on Gen < 8 GPUs using two miptrees.
Nanley Chery
nanleychery at gmail.com
Fri Jan 18 23:32:02 UTC 2019
On Mon, Nov 19, 2018 at 10:54:07AM +0200, Eleni Maria Stea wrote:
> GPUs Gen < 8 cannot render ETC2 formats. So far, they converted the
> compressed EAC/ETC2 images to non-compressed RGB format images that they
> can render. When GetCompressed* functions were called, the pixels were
> returned in the RGB format and not the compressed format as expected.
>
> Trying to fix this problem, we use the shadow miptree to store the
> decompressed data for the rendering and the main miptree to store the
> compressed. We use the BRW_MAP_ETC_BIT as a flag to indicate when we
> use the fake compression in order to map the main tree with the
> compressed data. The functions that upload the compressed data as well
> as the mapping/unmapping functions are now updated to use this flag.
Did you mean sample instead of render?
> ---
> .../drivers/dri/i965/brw_wm_surface_state.c | 26 +++++-
> src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 73 +++++++++++++--
> src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 17 ++++
> src/mesa/drivers/dri/i965/intel_tex_image.c | 45 ++++++++-
> src/mesa/main/texstore.c | 92 +++++++++++--------
> src/mesa/main/texstore.h | 9 ++
> 6 files changed, 204 insertions(+), 58 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index e214fae140..4d1eafac91 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -329,6 +329,17 @@ brw_get_texture_swizzle(const struct gl_context *ctx,
> {
> const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
>
> + struct brw_context *brw = brw_context((struct gl_context *)ctx);
> + const struct gen_device_info *devinfo = &brw->screen->devinfo;
> + bool is_fake_etc = _mesa_is_format_etc2(img->TexFormat) &&
> + devinfo->gen < 8;
> +
> + mesa_format format;
> + if (is_fake_etc)
> + format = intel_lower_compressed_format(brw, img->TexFormat);
> + else
> + format = img->TexFormat;
> +
Why is modifying this function necessary?
> int swizzles[SWIZZLE_NIL + 1] = {
> SWIZZLE_X,
> SWIZZLE_Y,
> @@ -381,7 +392,7 @@ brw_get_texture_swizzle(const struct gl_context *ctx,
> }
> }
>
> - GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
> + GLenum datatype = _mesa_get_format_datatype(format);
>
> /* If the texture's format is alpha-only, force R, G, and B to
> * 0.0. Similarly, if the texture's format has no alpha channel,
> @@ -422,9 +433,9 @@ brw_get_texture_swizzle(const struct gl_context *ctx,
> case GL_RED:
> case GL_RG:
> case GL_RGB:
> - if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
> - img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
> - img->TexFormat == MESA_FORMAT_SRGB_DXT1)
> + if (_mesa_get_format_bits(format, GL_ALPHA_BITS) > 0 ||
> + format == MESA_FORMAT_RGB_DXT1 ||
> + format == MESA_FORMAT_SRGB_DXT1)
> swizzles[3] = SWIZZLE_ONE;
> break;
> }
> @@ -474,6 +485,11 @@ static void brw_update_texture_surface(struct gl_context *ctx,
> struct intel_texture_object *intel_obj = intel_texture_object(obj);
> struct intel_mipmap_tree *mt = intel_obj->mt;
>
> + if (mt->needs_fake_etc) {
> + assert(mt->shadow_mt);
> + mt = mt->shadow_mt;
> + }
> +
> if (plane > 0) {
> if (mt->plane[plane - 1] == NULL)
> return;
> @@ -512,7 +528,7 @@ static void brw_update_texture_surface(struct gl_context *ctx,
> * is safe because texture views aren't allowed on depth/stencil.
> */
> mesa_fmt = mt->format;
> - } else if (mt->etc_format != MESA_FORMAT_NONE) {
> + } else if (intel_obj->mt->etc_format != MESA_FORMAT_NONE) {
> mesa_fmt = mt->format;
For uniformity, lets access mt->shadow_mt->format here and move the
mt->needs_fake_etc check from above to below this condition:
} else if (devinfo->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
> } else if (plane > 0) {
> mesa_fmt = mt->format;
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 0e67e4d8f3..b24332ff67 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -689,6 +689,8 @@ miptree_create(struct brw_context *brw,
> if (devinfo->gen < 6 && _mesa_is_format_color_format(format))
> tiling_flags &= ~ISL_TILING_Y0_BIT;
>
> + bool fakes_etc_compression = devinfo->gen < 8 && _mesa_is_format_etc2(format);
> +
> mesa_format mt_fmt;
> if (_mesa_is_format_color_format(format)) {
> mt_fmt = intel_lower_compressed_format(brw, format);
Why not reserve calling intel_lower_compressed_format() for the case in
which we're creating a fake ETC miptree? Leaving this makes things
unnecessarily complex.
> @@ -700,18 +702,41 @@ miptree_create(struct brw_context *brw,
> intel_depth_format_for_depthstencil_format(format);
> }
>
> + mesa_format fmt = fakes_etc_compression ? format : mt_fmt;
> struct intel_mipmap_tree *mt =
> - make_surface(brw, target, mt_fmt, first_level, last_level,
> + make_surface(brw, target, fmt, first_level, last_level,
> width0, height0, depth0, num_samples,
> - tiling_flags, mt_surf_usage(mt_fmt),
> + tiling_flags, mt_surf_usage(fmt),
> alloc_flags, 0, NULL);
>
> if (mt == NULL)
> return NULL;
>
> + mt->needs_fake_etc = fakes_etc_compression;
> + if (mt->needs_fake_etc) {
> + mt->etc_format = format;
> + mt->shadow_mt = make_surface(brw, target, mt_fmt, first_level,
> + last_level, width0, height0, depth0,
> + num_samples, tiling_flags,
> + mt_surf_usage(mt_fmt),
> + alloc_flags, 0, NULL);
> + if (mt->shadow_mt == NULL) {
> + intel_miptree_release(&mt);
> + return NULL;
> + }
> +
> + mt->shadow_mt->etc_format = MESA_FORMAT_NONE;
> + if (!(flags & MIPTREE_CREATE_NO_AUX))
> + intel_miptree_choose_aux_usage(brw, mt->shadow_mt);
I don't think we'll use an auxiliary buffer with this miptree.
> + } else {
> + mt->etc_format = (_mesa_is_format_color_format(format) &&
> + mt_fmt != format) ? format : MESA_FORMAT_NONE;
Won't this always resolve to MESA_FORMAT_NONE?
> + }
> +
> if (needs_separate_stencil(brw, mt, format)) {
> mt->stencil_mt =
> - make_surface(brw, target, MESA_FORMAT_S_UINT8, first_level, last_level,
> + make_surface(brw, target, MESA_FORMAT_S_UINT8, first_level,
> + last_level,
This change is unrelated.
> width0, height0, depth0, num_samples,
> ISL_TILING_W_BIT, mt_surf_usage(MESA_FORMAT_S_UINT8),
> alloc_flags, 0, NULL);
> @@ -721,9 +746,6 @@ miptree_create(struct brw_context *brw,
> }
> }
>
> - mt->etc_format = (_mesa_is_format_color_format(format) && mt_fmt != format) ?
> - format : MESA_FORMAT_NONE;
> -
> if (!(flags & MIPTREE_CREATE_NO_AUX))
> intel_miptree_choose_aux_usage(brw, mt);
>
> @@ -3700,7 +3722,7 @@ use_intel_mipree_map_blit(struct brw_context *brw,
> */
> void
> intel_miptree_map(struct brw_context *brw,
> - struct intel_mipmap_tree *mt,
> + struct intel_mipmap_tree *miptree,
> unsigned int level,
> unsigned int slice,
> unsigned int x,
> @@ -3713,9 +3735,31 @@ intel_miptree_map(struct brw_context *brw,
> {
> const struct gen_device_info *devinfo = &brw->screen->devinfo;
> struct intel_miptree_map *map;
> + struct intel_mipmap_tree *mt = miptree;
>
> assert(mt->surf.samples == 1);
>
> + if (mt->needs_fake_etc) {
> + if (!(mode & BRW_MAP_ETC_BIT)) {
> + assert(mt->shadow_mt);
> +
> + mt->is_shadow_mapped = true;
> +
> + mt->shadow_needs_update = false;
> + mt = miptree->shadow_mt;
> + } else {
> + mt->is_shadow_mapped = false;
> +
> + /*
> + * We need to update the shadow miptree every time we invalidate the
> + * main miptree or we map it for writing.
> + */
> + if (mode & GL_MAP_WRITE_BIT || mode & GL_MAP_INVALIDATE_RANGE_BIT) {
> + mt->shadow_needs_update = true;
This won't catch all the cases in which the shadow needs updating.
You'll need to update intel_miptree_finish_write() for that. I have a
similar patch here: https://patchwork.freedesktop.org/patch/253199/
> + }
> + }
> + }
> +
> map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
> if (!map){
> *out_ptr = NULL;
> @@ -3726,7 +3770,7 @@ intel_miptree_map(struct brw_context *brw,
> if (mt->format == MESA_FORMAT_S_UINT8) {
> intel_miptree_map_s8(brw, mt, map, level, slice);
> } else if (mt->etc_format != MESA_FORMAT_NONE &&
> - !(mode & BRW_MAP_DIRECT_BIT)) {
> + !(mode & BRW_MAP_DIRECT_BIT) && !mt->needs_fake_etc) {
> intel_miptree_map_etc(brw, mt, map, level, slice);
> } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
> intel_miptree_map_depthstencil(brw, mt, map, level, slice);
> @@ -3755,10 +3799,21 @@ intel_miptree_map(struct brw_context *brw,
>
> void
> intel_miptree_unmap(struct brw_context *brw,
> - struct intel_mipmap_tree *mt,
> + struct intel_mipmap_tree *miptree,
> unsigned int level,
> unsigned int slice)
> {
> + struct intel_mipmap_tree *mt = miptree;
> +
> + if (miptree->needs_fake_etc) {
> + if (miptree->is_shadow_mapped) {
> + assert(miptree->shadow_mt);
> +
> + miptree->is_shadow_mapped = false;
> + mt = miptree->shadow_mt;
> + }
> + }
> +
Instead of mapping the shadow for writes, I think it'd be simpler to
always give access to the main miptree when mapping, then decompress the
data into the shadow during the unmap operation.
> struct intel_miptree_map *map = mt->level[level].slice[slice].map;
>
> assert(mt->surf.samples == 1);
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> index b955a2bab1..aca45cfaa4 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> @@ -74,6 +74,7 @@ struct intel_texture_image;
> * without transcoding back. This flag to intel_miptree_map() gets you that.
> */
> #define BRW_MAP_DIRECT_BIT 0x80000000
> +#define BRW_MAP_ETC_BIT 0x40000000
>
> struct intel_miptree_map {
> /** Bitfield of GL_MAP_*_BIT and BRW_MAP_*_BIT. */
> @@ -305,6 +306,22 @@ struct intel_mipmap_tree
> struct intel_mipmap_tree *shadow_mt;
> bool shadow_needs_update;
>
> + /**
> + * \brief Indicates that the shadow miptree needs to be updated
> + *
> + * It's necessary when we map the shadow tree to fake the ETC2 compression
> + * so that we know that we have to unmap the shadow and not the main.\n
> + */
> + bool is_shadow_mapped;
> +
> + /**
> + * \brief Indicates that we fake the ETC2 compression support
> + *
> + * GPUs Gen < 8 don't support sampling and rendering of ETC2 formats so
> + * we need to fake it. This variable is set to true when we fake it.
> + */
> + bool needs_fake_etc;
> +
Let's make a function to detect needs_fake_etc instead of adding to the data
structure. That'd be easier to follow.
-Nanley
> /**
> * \brief CCS, MCS, or HiZ auxiliary buffer.
> *
> diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
> index bdcdb7736e..a8abb69118 100644
> --- a/src/mesa/drivers/dri/i965/intel_tex_image.c
> +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
> @@ -903,6 +903,43 @@ flush_astc_denorms(struct gl_context *ctx, GLuint dims,
> }
> }
>
> +static void intel_compressed_texsubimage(struct gl_context *ctx,
> + GLuint dims,
> + struct gl_texture_image *texImage,
> + GLint xoffset, GLint yoffset,
> + GLint zoffset, GLsizei width,
> + GLsizei height, GLsizei depth,
> + GLenum format, GLsizei imageSize,
> + const GLvoid *data)
> +{
> + GLbitfield mode = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT;
> + struct compressed_pixelstore store;
> +
> + if (dims == 1) {
> + _mesa_problem(ctx, "Unexpected 1D compressed texsubimage call");
> + return;
> + }
> +
> + _mesa_compute_compressed_pixelstore(dims, texImage->TexFormat,
> + width, height, depth,
> + &ctx->Unpack, &store);
> +
> + data = _mesa_validate_pbo_compressed_teximage(ctx, dims, imageSize,
> + data, &ctx->Unpack,
> + "glCompressedTexSubImage");
> + if (!data)
> + return;
> +
> + GLbitfield etc_mode = mode;
> + if (intel_texture_image(texImage)->mt->needs_fake_etc)
> + etc_mode |= BRW_MAP_ETC_BIT;
> +
> + _mesa_upload_compressed_texsubimage(ctx, dims, &store, texImage,
> + xoffset, yoffset, zoffset, width,
> + height, depth, etc_mode, data);
> +
> + _mesa_unmap_teximage_pbo(ctx, &ctx->Unpack);
> +}
>
> static void
> intelCompressedTexSubImage(struct gl_context *ctx, GLuint dims,
> @@ -913,10 +950,10 @@ intelCompressedTexSubImage(struct gl_context *ctx, GLuint dims,
> GLsizei imageSize, const GLvoid *data)
> {
> /* Upload the compressed data blocks */
> - _mesa_store_compressed_texsubimage(ctx, dims, texImage,
> - xoffset, yoffset, zoffset,
> - width, height, depth,
> - format, imageSize, data);
> + intel_compressed_texsubimage(ctx, dims, texImage,
> + xoffset, yoffset, zoffset,
> + width, height, depth,
> + format, imageSize, data);
>
> /* Fix up copied ASTC blocks if necessary */
> GLenum gl_format = _mesa_compressed_format_to_glenum(ctx,
> diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
> index 2913d4bc06..38043f5dba 100644
> --- a/src/mesa/main/texstore.c
> +++ b/src/mesa/main/texstore.c
> @@ -1320,6 +1320,54 @@ _mesa_compute_compressed_pixelstore(GLuint dims, mesa_format texFormat,
> }
> }
>
> +void
> +_mesa_upload_compressed_texsubimage(struct gl_context *ctx, GLuint dims,
> + struct compressed_pixelstore *store,
> + struct gl_texture_image *texImage,
> + GLint xoffset, GLint yoffset,
> + GLint zoffset, GLsizei width,
> + GLsizei height, GLsizei depth,
> + GLbitfield mode, const GLvoid *data)
> +{
> + GLubyte *dstMap = NULL;
> + GLint dstRowStride;
> + GLint i, slice;
> + const GLubyte *src = (const GLubyte *) data + store->SkipBytes;
> +
> + for (slice = 0; slice < store->CopySlices; slice++) {
> + /* Map dest texture buffer */
> + ctx->Driver.MapTextureImage(ctx, texImage, slice + zoffset,
> + xoffset, yoffset, width, height,
> + mode,
> + &dstMap, &dstRowStride);
> +
> + if (dstMap) {
> + /* copy rows of blocks */
> + if (dstRowStride == store->TotalBytesPerRow &&
> + dstRowStride == store->CopyBytesPerRow) {
> + memcpy(dstMap, src, store->CopyBytesPerRow * store->CopyRowsPerSlice);
> + src += store->CopyBytesPerRow * store->CopyRowsPerSlice;
> + }
> + else {
> + for (i = 0; i < store->CopyRowsPerSlice; i++) {
> + memcpy(dstMap, src, store->CopyBytesPerRow);
> + dstMap += dstRowStride;
> + src += store->TotalBytesPerRow;
> + }
> + }
> +
> + ctx->Driver.UnmapTextureImage(ctx, texImage, slice + zoffset);
> +
> + /* advance to next slice */
> + src += store->TotalBytesPerRow * (store->TotalRowsPerSlice
> + - store->CopyRowsPerSlice);
> + }
> + else {
> + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage%uD",
> + dims);
> + }
> + }
> +}
>
> /**
> * Fallback for Driver.CompressedTexSubImage()
> @@ -1333,10 +1381,7 @@ _mesa_store_compressed_texsubimage(struct gl_context *ctx, GLuint dims,
> GLsizei imageSize, const GLvoid *data)
> {
> struct compressed_pixelstore store;
> - GLint dstRowStride;
> - GLint i, slice;
> - GLubyte *dstMap;
> - const GLubyte *src;
> + GLbitfield mode = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT;
>
> if (dims == 1) {
> _mesa_problem(ctx, "Unexpected 1D compressed texsubimage call");
> @@ -1354,42 +1399,9 @@ _mesa_store_compressed_texsubimage(struct gl_context *ctx, GLuint dims,
> if (!data)
> return;
>
> - src = (const GLubyte *) data + store.SkipBytes;
> -
> - for (slice = 0; slice < store.CopySlices; slice++) {
> - /* Map dest texture buffer */
> - ctx->Driver.MapTextureImage(ctx, texImage, slice + zoffset,
> - xoffset, yoffset, width, height,
> - GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,
> - &dstMap, &dstRowStride);
> -
> - if (dstMap) {
> -
> - /* copy rows of blocks */
> - if (dstRowStride == store.TotalBytesPerRow &&
> - dstRowStride == store.CopyBytesPerRow) {
> - memcpy(dstMap, src, store.CopyBytesPerRow * store.CopyRowsPerSlice);
> - src += store.CopyBytesPerRow * store.CopyRowsPerSlice;
> - }
> - else {
> - for (i = 0; i < store.CopyRowsPerSlice; i++) {
> - memcpy(dstMap, src, store.CopyBytesPerRow);
> - dstMap += dstRowStride;
> - src += store.TotalBytesPerRow;
> - }
> - }
> -
> - ctx->Driver.UnmapTextureImage(ctx, texImage, slice + zoffset);
> -
> - /* advance to next slice */
> - src += store.TotalBytesPerRow * (store.TotalRowsPerSlice
> - - store.CopyRowsPerSlice);
> - }
> - else {
> - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage%uD",
> - dims);
> - }
> - }
> + _mesa_upload_compressed_texsubimage(ctx, dims, &store, texImage,
> + xoffset, yoffset, zoffset, width,
> + height, depth, mode, data);
>
> _mesa_unmap_teximage_pbo(ctx, &ctx->Unpack);
> }
> diff --git a/src/mesa/main/texstore.h b/src/mesa/main/texstore.h
> index 2fef7ba7d7..c70ac77972 100644
> --- a/src/mesa/main/texstore.h
> +++ b/src/mesa/main/texstore.h
> @@ -158,6 +158,15 @@ struct compressed_pixelstore {
> int CopySlices;
> };
>
> +extern void
> +_mesa_upload_compressed_texsubimage(struct gl_context *ctx,
> + GLuint dims,
> + struct compressed_pixelstore *store,
> + struct gl_texture_image *texImage,
> + GLint xoffset, GLint yoffset,
> + GLint zoffset, GLsizei width,
> + GLsizei height, GLsizei depth,
> + GLbitfield mode, const GLvoid *data);
>
> extern void
> _mesa_compute_compressed_pixelstore(GLuint dims, mesa_format texFormat,
> --
> 2.19.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list