Mesa (main): radv: Add ETC2 decode shader.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Dec 14 12:10:38 UTC 2021


Module: Mesa
Branch: main
Commit: 1153db23f58c39af2472e3bdfa721879ca1ac8cd
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=1153db23f58c39af2472e3bdfa721879ca1ac8cd

Author: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Date:   Thu Nov  4 00:49:45 2021 +0100

radv: Add ETC2 decode shader.

To make sure that apps actually get something when the HW doesn't
support ETC2. To do that we decompress after every copy operation.

Includes a quite complicated decode shader. It is not bit-to-bit
equivalent to AMD APUs that support ETC2, but close enough to
pass CTS. Likely missing bits are related to the R11 and R11G11
formats where we decode to 16 bits but likely do the extension
differently.

Acked-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14071>

---

 src/amd/vulkan/meson.build            |   1 +
 src/amd/vulkan/radv_meta.c            |   7 +
 src/amd/vulkan/radv_meta.h            |   7 +
 src/amd/vulkan/radv_meta_copy.c       |  30 ++
 src/amd/vulkan/radv_meta_etc_decode.c | 837 ++++++++++++++++++++++++++++++++++
 src/amd/vulkan/radv_private.h         |   6 +
 6 files changed, 888 insertions(+)

diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build
index 2e365b7a192..92d3c4bc843 100644
--- a/src/amd/vulkan/meson.build
+++ b/src/amd/vulkan/meson.build
@@ -62,6 +62,7 @@ libradv_files = files(
   'radv_meta_copy_vrs_htile.c',
   'radv_meta_dcc_retile.c',
   'radv_meta_decompress.c',
+  'radv_meta_etc_decode.c',
   'radv_meta_fast_clear.c',
   'radv_meta_fmask_copy.c',
   'radv_meta_fmask_expand.c',
diff --git a/src/amd/vulkan/radv_meta.c b/src/amd/vulkan/radv_meta.c
index 8ee6ef4c60b..e49a414e272 100644
--- a/src/amd/vulkan/radv_meta.c
+++ b/src/amd/vulkan/radv_meta.c
@@ -495,8 +495,14 @@ radv_device_init_meta(struct radv_device *device)
    if (result != VK_SUCCESS)
       goto fail_fmask_copy;
 
+   result = radv_device_init_meta_etc_decode_state(device, on_demand);
+   if (result != VK_SUCCESS)
+      goto fail_etc_decode;
+
    return VK_SUCCESS;
 
+fail_etc_decode:
+   radv_device_finish_meta_fmask_copy_state(device);
 fail_fmask_copy:
    radv_device_finish_accel_struct_build_state(device);
 fail_accel_struct_build:
@@ -532,6 +538,7 @@ fail_clear:
 void
 radv_device_finish_meta(struct radv_device *device)
 {
+   radv_device_finish_meta_etc_decode_state(device);
    radv_device_finish_accel_struct_build_state(device);
    radv_device_finish_meta_clear_state(device);
    radv_device_finish_meta_resolve_state(device);
diff --git a/src/amd/vulkan/radv_meta.h b/src/amd/vulkan/radv_meta.h
index 60d6d2f1725..5a74ab72a91 100644
--- a/src/amd/vulkan/radv_meta.h
+++ b/src/amd/vulkan/radv_meta.h
@@ -143,6 +143,9 @@ void radv_device_finish_meta_copy_vrs_htile_state(struct radv_device *device);
 VkResult radv_device_init_accel_struct_build_state(struct radv_device *device);
 void radv_device_finish_accel_struct_build_state(struct radv_device *device);
 
+VkResult radv_device_init_meta_etc_decode_state(struct radv_device *device, bool on_demand);
+void radv_device_finish_meta_etc_decode_state(struct radv_device *device);
+
 void radv_meta_save(struct radv_meta_saved_state *saved_state, struct radv_cmd_buffer *cmd_buffer,
                     uint32_t flags);
 
@@ -261,6 +264,10 @@ uint32_t radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_
 void radv_update_buffer_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const void *data,
                            uint64_t size);
 
+void radv_meta_decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                          VkImageLayout layout, const VkImageSubresourceLayers *subresource,
+                          VkOffset3D offset, VkExtent3D extent);
+
 /**
  * Return whether the bound pipeline is the FMASK decompress pass.
  */
diff --git a/src/amd/vulkan/radv_meta_copy.c b/src/amd/vulkan/radv_meta_copy.c
index aed65196c3b..51a93f67ecd 100644
--- a/src/amd/vulkan/radv_meta_copy.c
+++ b/src/amd/vulkan/radv_meta_copy.c
@@ -256,6 +256,21 @@ radv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,
                            pCopyBufferToImageInfo->dstImageLayout,
                            &pCopyBufferToImageInfo->pRegions[r]);
    }
+
+   if (cmd_buffer->device->physical_device->emulate_etc2 &&
+       vk_format_description(dst_image->vk_format)->layout == UTIL_FORMAT_LAYOUT_ETC) {
+      cmd_buffer->state.flush_bits |=
+         RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+         radv_src_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, dst_image) |
+         radv_dst_access_flush(
+            cmd_buffer, VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, dst_image);
+      for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
+         radv_meta_decode_etc(cmd_buffer, dst_image, pCopyBufferToImageInfo->dstImageLayout,
+                              &pCopyBufferToImageInfo->pRegions[r].imageSubresource,
+                              pCopyBufferToImageInfo->pRegions[r].imageOffset,
+                              pCopyBufferToImageInfo->pRegions[r].imageExtent);
+      }
+   }
 }
 
 static void
@@ -592,4 +607,19 @@ radv_CmdCopyImage2KHR(VkCommandBuffer commandBuffer, const VkCopyImageInfo2KHR *
       copy_image(cmd_buffer, src_image, pCopyImageInfo->srcImageLayout, dst_image,
                  pCopyImageInfo->dstImageLayout, &pCopyImageInfo->pRegions[r]);
    }
+
+   if (cmd_buffer->device->physical_device->emulate_etc2 &&
+       vk_format_description(dst_image->vk_format)->layout == UTIL_FORMAT_LAYOUT_ETC) {
+      cmd_buffer->state.flush_bits |=
+         RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+         radv_src_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, dst_image) |
+         radv_dst_access_flush(
+            cmd_buffer, VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, dst_image);
+      for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
+         radv_meta_decode_etc(cmd_buffer, dst_image, pCopyImageInfo->dstImageLayout,
+                              &pCopyImageInfo->pRegions[r].dstSubresource,
+                              pCopyImageInfo->pRegions[r].dstOffset,
+                              pCopyImageInfo->pRegions[r].extent);
+      }
+   }
 }
diff --git a/src/amd/vulkan/radv_meta_etc_decode.c b/src/amd/vulkan/radv_meta_etc_decode.c
new file mode 100644
index 00000000000..c32113a8be9
--- /dev/null
+++ b/src/amd/vulkan/radv_meta_etc_decode.c
@@ -0,0 +1,837 @@
+/*
+ * Copyright © 2021 Google
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+
+#include "nir/nir_builder.h"
+#include "radv_meta.h"
+#include "radv_private.h"
+#include "sid.h"
+#include "vk_format.h"
+
+/* Based on
+ * https://github.com/Themaister/Granite/blob/master/assets/shaders/decode/etc2.comp
+ * https://github.com/Themaister/Granite/blob/master/assets/shaders/decode/eac.comp
+ *
+ * With some differences:
+ *  - Use the vk format to do all the settings.
+ *  - Combine the ETC2 and EAC shaders.
+ *  - Since we combined the above, reuse the function for the ETC2 A8 component.
+ *  - the EAC shader doesn't do SNORM correctly, so this has that fixed.
+ */
+
+static nir_ssa_def *
+flip_endian(nir_builder *b, nir_ssa_def *src, unsigned cnt)
+{
+   nir_ssa_def *v[2];
+   for (unsigned i = 0; i < cnt; ++i) {
+      nir_ssa_def *intermediate[4];
+      nir_ssa_def *chan = cnt == 1 ? src : nir_channel(b, src, i);
+      for (unsigned j = 0; j < 4; ++j)
+         intermediate[j] = nir_ubfe(b, chan, nir_imm_int(b, 8 * j), nir_imm_int(b, 8));
+      v[i] = nir_ior(b,
+                     nir_ior(b, nir_ishl(b, intermediate[0], nir_imm_int(b, 24)),
+                             nir_ishl(b, intermediate[1], nir_imm_int(b, 16))),
+                     nir_ior(b, nir_ishl(b, intermediate[2], nir_imm_int(b, 8)),
+                             nir_ishl(b, intermediate[3], nir_imm_int(b, 0))));
+   }
+   return cnt == 1 ? v[0] : nir_vec(b, v, cnt);
+}
+
+static nir_ssa_def *
+etc1_color_modifier_lookup(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   const unsigned table[8][2] = {{2, 8},   {5, 17},  {9, 29},   {13, 42},
+                                 {18, 60}, {24, 80}, {33, 106}, {47, 183}};
+   nir_ssa_def *upper = nir_ieq(b, y, nir_imm_int(b, 1));
+   nir_ssa_def *result = NULL;
+   for (unsigned i = 0; i < 8; ++i) {
+      nir_ssa_def *tmp =
+         nir_bcsel(b, upper, nir_imm_int(b, table[i][1]), nir_imm_int(b, table[i][0]));
+      if (result)
+         result = nir_bcsel(b, nir_ieq(b, x, nir_imm_int(b, i)), tmp, result);
+      else
+         result = tmp;
+   }
+   return result;
+}
+
+static nir_ssa_def *
+etc2_distance_lookup(nir_builder *b, nir_ssa_def *x)
+{
+   const unsigned table[8] = {3, 6, 11, 16, 23, 32, 41, 64};
+   nir_ssa_def *result = NULL;
+   for (unsigned i = 0; i < 8; ++i) {
+      if (result)
+         result = nir_bcsel(b, nir_ieq(b, x, nir_imm_int(b, i)), nir_imm_int(b, table[i]), result);
+      else
+         result = nir_imm_int(b, table[i]);
+   }
+   return result;
+}
+
+static nir_ssa_def *
+etc1_alpha_modifier_lookup(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   const unsigned table[16] = {0xe852, 0xc962, 0xc741, 0xc531, 0xb752, 0xa862, 0xa763, 0xa742,
+                               0x9751, 0x9741, 0x9731, 0x9641, 0x9632, 0x9210, 0x8753, 0x8642};
+   nir_ssa_def *result = NULL;
+   for (unsigned i = 0; i < 16; ++i) {
+      nir_ssa_def *tmp = nir_imm_int(b, table[i]);
+      if (result)
+         result = nir_bcsel(b, nir_ieq(b, x, nir_imm_int(b, i)), tmp, result);
+      else
+         result = tmp;
+   }
+   return nir_ubfe(b, result, nir_imul(b, y, nir_imm_int(b, 4)), nir_imm_int(b, 4));
+}
+
+static nir_ssa_def *
+etc_extend(nir_builder *b, nir_ssa_def *v, int bits)
+{
+   if (bits == 4)
+      return nir_imul(b, v, nir_imm_int(b, 0x11));
+   return nir_ior(b, nir_ishl(b, v, nir_imm_int(b, 8 - bits)),
+                  nir_ushr(b, v, nir_imm_int(b, bits - (8 - bits))));
+}
+
+static nir_ssa_def *
+decode_etc2_alpha(struct nir_builder *b, nir_ssa_def *alpha_payload, nir_ssa_def *linear_pixel,
+                  bool eac, nir_ssa_def *is_signed)
+{
+   alpha_payload = flip_endian(b, alpha_payload, 2);
+   nir_ssa_def *alpha_x = nir_channel(b, alpha_payload, 1);
+   nir_ssa_def *alpha_y = nir_channel(b, alpha_payload, 0);
+   nir_ssa_def *bit_offset =
+      nir_isub(b, nir_imm_int(b, 45), nir_imul(b, nir_imm_int(b, 3), linear_pixel));
+   nir_ssa_def *base = nir_ubfe(b, alpha_y, nir_imm_int(b, 24), nir_imm_int(b, 8));
+   nir_ssa_def *multiplier = nir_ubfe(b, alpha_y, nir_imm_int(b, 20), nir_imm_int(b, 4));
+   nir_ssa_def *table = nir_ubfe(b, alpha_y, nir_imm_int(b, 16), nir_imm_int(b, 4));
+
+   if (eac) {
+      nir_ssa_def *signed_base = nir_ibfe(b, alpha_y, nir_imm_int(b, 24), nir_imm_int(b, 8));
+      signed_base = nir_imul(b, signed_base, nir_imm_int(b, 8));
+      base = nir_iadd(b, nir_imul(b, base, nir_imm_int(b, 8)), nir_imm_int(b, 4));
+      base = nir_bcsel(b, is_signed, signed_base, base);
+      multiplier = nir_imax(b, nir_imul(b, multiplier, nir_imm_int(b, 8)), nir_imm_int(b, 1));
+   }
+
+   nir_ssa_def *lsb_index =
+      nir_ubfe(b, nir_bcsel(b, nir_uge(b, bit_offset, nir_imm_int(b, 32)), alpha_y, alpha_x),
+               nir_iand(b, bit_offset, nir_imm_int(b, 31)), nir_imm_int(b, 2));
+   bit_offset = nir_iadd(b, bit_offset, nir_imm_int(b, 2));
+   nir_ssa_def *msb =
+      nir_ubfe(b, nir_bcsel(b, nir_uge(b, bit_offset, nir_imm_int(b, 32)), alpha_y, alpha_x),
+               nir_iand(b, bit_offset, nir_imm_int(b, 31)), nir_imm_int(b, 1));
+   nir_ssa_def *mod = nir_ixor(b, etc1_alpha_modifier_lookup(b, table, lsb_index),
+                               nir_isub(b, msb, nir_imm_int(b, 1)));
+   nir_ssa_def *a = nir_iadd(b, base, nir_imul(b, mod, multiplier));
+
+   nir_ssa_def *low_bound = nir_imm_int(b, 0);
+   nir_ssa_def *high_bound = nir_imm_int(b, 255);
+   nir_ssa_def *final_mult = nir_imm_float(b, 1 / 255.0);
+   if (eac) {
+      low_bound = nir_bcsel(b, is_signed, nir_imm_int(b, -1023), low_bound);
+      high_bound = nir_bcsel(b, is_signed, nir_imm_int(b, 1023), nir_imm_int(b, 2047));
+      final_mult =
+         nir_bcsel(b, is_signed, nir_imm_float(b, 1 / 1023.0), nir_imm_float(b, 1 / 2047.0));
+   }
+
+   return nir_fmul(b, nir_i2f32(b, nir_iclamp(b, a, low_bound, high_bound)), final_mult);
+}
+
+static nir_shader *
+build_shader(struct radv_device *dev)
+{
+   const struct glsl_type *sampler_type_2d =
+      glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, true, GLSL_TYPE_FLOAT);
+   const struct glsl_type *sampler_type_3d =
+      glsl_sampler_type(GLSL_SAMPLER_DIM_3D, false, false, GLSL_TYPE_FLOAT);
+   const struct glsl_type *img_type_2d =
+      glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_FLOAT);
+   const struct glsl_type *img_type_3d =
+      glsl_image_type(GLSL_SAMPLER_DIM_3D, false, GLSL_TYPE_FLOAT);
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_decode_etc");
+   b.shader->info.workgroup_size[0] = 8;
+   b.shader->info.workgroup_size[1] = 8;
+   b.shader->info.workgroup_size[2] = 1;
+
+   nir_variable *input_img_2d =
+      nir_variable_create(b.shader, nir_var_uniform, sampler_type_2d, "s_tex_2d");
+   input_img_2d->data.descriptor_set = 0;
+   input_img_2d->data.binding = 0;
+
+   nir_variable *input_img_3d =
+      nir_variable_create(b.shader, nir_var_uniform, sampler_type_3d, "s_tex_3d");
+   input_img_2d->data.descriptor_set = 0;
+   input_img_2d->data.binding = 0;
+
+   nir_variable *output_img_2d =
+      nir_variable_create(b.shader, nir_var_image, img_type_2d, "out_img_2d");
+   output_img_2d->data.descriptor_set = 0;
+   output_img_2d->data.binding = 1;
+
+   nir_variable *output_img_3d =
+      nir_variable_create(b.shader, nir_var_image, img_type_3d, "out_img_3d");
+   output_img_3d->data.descriptor_set = 0;
+   output_img_3d->data.binding = 1;
+
+   nir_ssa_def *global_id = get_global_ids(&b, 3);
+
+   nir_ssa_def *consts = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
+   nir_ssa_def *consts2 =
+      nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
+   nir_ssa_def *offset = nir_channels(&b, consts, 7);
+   nir_ssa_def *format = nir_channel(&b, consts, 3);
+   nir_ssa_def *image_type = nir_channel(&b, consts2, 0);
+   nir_ssa_def *is_3d = nir_ieq(&b, image_type, nir_imm_int(&b, VK_IMAGE_TYPE_3D));
+   nir_ssa_def *coord = nir_iadd(&b, global_id, offset);
+   nir_ssa_def *src_coord =
+      nir_vec3(&b, nir_ushr_imm(&b, nir_channel(&b, coord, 0), 2),
+               nir_ushr_imm(&b, nir_channel(&b, coord, 1), 2), nir_channel(&b, coord, 2));
+
+   nir_variable *payload_var =
+      nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "payload");
+   nir_push_if(&b, is_3d);
+   {
+      nir_ssa_def *tex_deref = &nir_build_deref_var(&b, input_img_3d)->dest.ssa;
+
+      nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+      tex->sampler_dim = GLSL_SAMPLER_DIM_3D;
+      tex->op = nir_texop_txf;
+      tex->src[0].src_type = nir_tex_src_coord;
+      tex->src[0].src = nir_src_for_ssa(src_coord);
+      tex->src[1].src_type = nir_tex_src_lod;
+      tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+      tex->src[2].src_type = nir_tex_src_texture_deref;
+      tex->src[2].src = nir_src_for_ssa(tex_deref);
+      tex->dest_type = nir_type_uint32;
+      tex->is_array = false;
+      tex->coord_components = 3;
+
+      nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+      nir_builder_instr_insert(&b, &tex->instr);
+      nir_store_var(&b, payload_var, &tex->dest.ssa, 0xf);
+   }
+   nir_push_else(&b, NULL);
+   {
+      nir_ssa_def *tex_deref = &nir_build_deref_var(&b, input_img_2d)->dest.ssa;
+
+      nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+      tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+      tex->op = nir_texop_txf;
+      tex->src[0].src_type = nir_tex_src_coord;
+      tex->src[0].src = nir_src_for_ssa(src_coord);
+      tex->src[1].src_type = nir_tex_src_lod;
+      tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+      tex->src[2].src_type = nir_tex_src_texture_deref;
+      tex->src[2].src = nir_src_for_ssa(tex_deref);
+      tex->dest_type = nir_type_uint32;
+      tex->is_array = true;
+      tex->coord_components = 3;
+
+      nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+      nir_builder_instr_insert(&b, &tex->instr);
+      nir_store_var(&b, payload_var, &tex->dest.ssa, 0xf);
+   }
+   nir_pop_if(&b, NULL);
+
+   nir_ssa_def *pixel_coord = nir_iand(&b, nir_channels(&b, coord, 3), nir_imm_ivec2(&b, 3, 3));
+   nir_ssa_def *linear_pixel =
+      nir_iadd(&b, nir_imul(&b, nir_channel(&b, pixel_coord, 0), nir_imm_int(&b, 4)),
+               nir_channel(&b, pixel_coord, 1));
+
+   nir_ssa_def *payload = nir_load_var(&b, payload_var);
+   nir_variable *color =
+      nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "color");
+   nir_store_var(&b, color, nir_imm_vec4(&b, 1.0, 0.0, 0.0, 1.0), 0xf);
+   nir_push_if(&b, nir_ilt(&b, format, nir_imm_int(&b, VK_FORMAT_EAC_R11_UNORM_BLOCK)));
+   {
+      nir_ssa_def *alpha_bits_8 =
+         nir_ige(&b, format, nir_imm_int(&b, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK));
+      nir_ssa_def *alpha_bits_1 =
+         nir_iand(&b, nir_ige(&b, format, nir_imm_int(&b, VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK)),
+                  nir_ilt(&b, format, nir_imm_int(&b, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK)));
+
+      nir_ssa_def *color_payload =
+         nir_bcsel(&b, alpha_bits_8, nir_channels(&b, payload, 0xC), nir_channels(&b, payload, 3));
+      color_payload = flip_endian(&b, color_payload, 2);
+      nir_ssa_def *color_y = nir_channel(&b, color_payload, 0);
+      nir_ssa_def *color_x = nir_channel(&b, color_payload, 1);
+      nir_ssa_def *flip =
+         nir_ine(&b, nir_iand(&b, color_y, nir_imm_int(&b, 1)), nir_imm_int(&b, 0));
+      nir_ssa_def *subblock = nir_ushr_imm(
+         &b, nir_bcsel(&b, flip, nir_channel(&b, pixel_coord, 1), nir_channel(&b, pixel_coord, 0)),
+         1);
+
+      nir_variable *punchthrough =
+         nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "punchthrough");
+      nir_ssa_def *punchthrough_init =
+         nir_iand(&b, alpha_bits_1,
+                  nir_ieq(&b, nir_iand(&b, color_y, nir_imm_int(&b, 2)), nir_imm_int(&b, 0)));
+      nir_store_var(&b, punchthrough, punchthrough_init, 0x1);
+
+      nir_variable *etc1_compat =
+         nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "etc1_compat");
+      nir_store_var(&b, etc1_compat, nir_imm_bool(&b, false), 0x1);
+
+      nir_variable *alpha_result =
+         nir_variable_create(b.shader, nir_var_shader_temp, glsl_float_type(), "alpha_result");
+      nir_push_if(&b, alpha_bits_8);
+      {
+         nir_store_var(
+            &b, alpha_result,
+            decode_etc2_alpha(&b, nir_channels(&b, payload, 3), linear_pixel, false, NULL), 1);
+      }
+      nir_push_else(&b, NULL);
+      {
+         nir_store_var(&b, alpha_result, nir_imm_float(&b, 1.0), 1);
+      }
+      nir_pop_if(&b, NULL);
+
+      const struct glsl_type *uvec3_type = glsl_vector_type(GLSL_TYPE_UINT, 3);
+      nir_variable *rgb_result =
+         nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "rgb_result");
+      nir_variable *base_rgb =
+         nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "base_rgb");
+      nir_store_var(&b, rgb_result, nir_imm_ivec3(&b, 255, 0, 0), 0x7);
+
+      nir_ssa_def *msb =
+         nir_iand(&b, nir_ushr(&b, color_x, nir_iadd(&b, nir_imm_int(&b, 15), linear_pixel)),
+                  nir_imm_int(&b, 2));
+      nir_ssa_def *lsb = nir_iand(&b, nir_ushr(&b, color_x, linear_pixel), nir_imm_int(&b, 1));
+
+      nir_push_if(
+         &b, nir_iand(&b, nir_inot(&b, alpha_bits_1),
+                      nir_ieq(&b, nir_iand(&b, color_y, nir_imm_int(&b, 2)), nir_imm_int(&b, 0))));
+      {
+         nir_store_var(&b, etc1_compat, nir_imm_bool(&b, true), 1);
+         nir_ssa_def *tmp[3];
+         for (unsigned i = 0; i < 3; ++i)
+            tmp[i] =
+               etc_extend(&b,
+                          nir_iand(&b,
+                                   nir_ushr(&b, color_y,
+                                            nir_isub(&b, nir_imm_int(&b, 28 - 8 * i),
+                                                     nir_imul(&b, subblock, nir_imm_int(&b, 4)))),
+                                   nir_imm_int(&b, 0xf)),
+                          4);
+         nir_store_var(&b, base_rgb, nir_vec(&b, tmp, 3), 0x7);
+      }
+      nir_push_else(&b, NULL);
+      {
+         nir_ssa_def *rb = nir_ubfe(&b, color_y, nir_imm_int(&b, 27), nir_imm_int(&b, 5));
+         nir_ssa_def *rd = nir_ibfe(&b, color_y, nir_imm_int(&b, 24), nir_imm_int(&b, 3));
+         nir_ssa_def *gb = nir_ubfe(&b, color_y, nir_imm_int(&b, 19), nir_imm_int(&b, 5));
+         nir_ssa_def *gd = nir_ibfe(&b, color_y, nir_imm_int(&b, 16), nir_imm_int(&b, 3));
+         nir_ssa_def *bb = nir_ubfe(&b, color_y, nir_imm_int(&b, 11), nir_imm_int(&b, 5));
+         nir_ssa_def *bd = nir_ibfe(&b, color_y, nir_imm_int(&b, 8), nir_imm_int(&b, 3));
+         nir_ssa_def *r1 = nir_iadd(&b, rb, rd);
+         nir_ssa_def *g1 = nir_iadd(&b, gb, gd);
+         nir_ssa_def *b1 = nir_iadd(&b, bb, bd);
+
+         nir_push_if(&b, nir_ult(&b, nir_imm_int(&b, 31), r1));
+         {
+            nir_ssa_def *r0 =
+               nir_ior(&b, nir_ubfe(&b, color_y, nir_imm_int(&b, 24), nir_imm_int(&b, 2)),
+                       nir_ishl(&b, nir_ubfe(&b, color_y, nir_imm_int(&b, 27), nir_imm_int(&b, 2)),
+                                nir_imm_int(&b, 2)));
+            nir_ssa_def *g0 = nir_ubfe(&b, color_y, nir_imm_int(&b, 20), nir_imm_int(&b, 4));
+            nir_ssa_def *b0 = nir_ubfe(&b, color_y, nir_imm_int(&b, 16), nir_imm_int(&b, 4));
+            nir_ssa_def *r2 = nir_ubfe(&b, color_y, nir_imm_int(&b, 12), nir_imm_int(&b, 4));
+            nir_ssa_def *g2 = nir_ubfe(&b, color_y, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
+            nir_ssa_def *b2 = nir_ubfe(&b, color_y, nir_imm_int(&b, 4), nir_imm_int(&b, 4));
+            nir_ssa_def *da =
+               nir_ior(&b,
+                       nir_ishl(&b, nir_ubfe(&b, color_y, nir_imm_int(&b, 2), nir_imm_int(&b, 2)),
+                                nir_imm_int(&b, 1)),
+                       nir_iand(&b, color_y, nir_imm_int(&b, 1)));
+            nir_ssa_def *dist = etc2_distance_lookup(&b, da);
+            nir_ssa_def *index = nir_ior(&b, lsb, msb);
+
+            nir_store_var(&b, punchthrough,
+                          nir_iand(&b, nir_load_var(&b, punchthrough),
+                                   nir_ieq(&b, nir_iadd(&b, lsb, msb), nir_imm_int(&b, 2))),
+                          0x1);
+            nir_push_if(&b, nir_ieq(&b, index, nir_imm_int(&b, 0)));
+            {
+               nir_store_var(&b, rgb_result, etc_extend(&b, nir_vec3(&b, r0, g0, b0), 4), 0x7);
+            }
+            nir_push_else(&b, NULL);
+            {
+
+               nir_ssa_def *tmp =
+                  nir_iadd(&b, etc_extend(&b, nir_vec3(&b, r2, g2, b2), 4),
+                           nir_imul(&b, dist, nir_isub(&b, nir_imm_int(&b, 2), index)));
+               nir_store_var(&b, rgb_result, tmp, 0x7);
+            }
+            nir_pop_if(&b, NULL);
+         }
+         nir_push_else(&b, NULL);
+         nir_push_if(&b, nir_ult(&b, nir_imm_int(&b, 31), g1));
+         {
+            nir_ssa_def *r0 = nir_ubfe(&b, color_y, nir_imm_int(&b, 27), nir_imm_int(&b, 4));
+            nir_ssa_def *g0 = nir_ior(
+               &b,
+               nir_ishl(&b, nir_ubfe(&b, color_y, nir_imm_int(&b, 24), nir_imm_int(&b, 3)),
+                        nir_imm_int(&b, 1)),
+               nir_iand(&b, nir_ushr(&b, color_y, nir_imm_int(&b, 20)), nir_imm_int(&b, 1)));
+            nir_ssa_def *b0 = nir_ior(
+               &b, nir_ubfe(&b, color_y, nir_imm_int(&b, 15), nir_imm_int(&b, 3)),
+               nir_iand(&b, nir_ushr(&b, color_y, nir_imm_int(&b, 16)), nir_imm_int(&b, 8)));
+            nir_ssa_def *r2 = nir_ubfe(&b, color_y, nir_imm_int(&b, 11), nir_imm_int(&b, 4));
+            nir_ssa_def *g2 = nir_ubfe(&b, color_y, nir_imm_int(&b, 7), nir_imm_int(&b, 4));
+            nir_ssa_def *b2 = nir_ubfe(&b, color_y, nir_imm_int(&b, 3), nir_imm_int(&b, 4));
+            nir_ssa_def *da = nir_iand(&b, color_y, nir_imm_int(&b, 4));
+            nir_ssa_def *db = nir_iand(&b, color_y, nir_imm_int(&b, 1));
+            nir_ssa_def *d = nir_iadd(&b, da, nir_imul(&b, db, nir_imm_int(&b, 2)));
+            nir_ssa_def *d0 = nir_iadd(&b, nir_ishl(&b, r0, nir_imm_int(&b, 16)),
+                                       nir_iadd(&b, nir_ishl(&b, g0, nir_imm_int(&b, 8)), b0));
+            nir_ssa_def *d2 = nir_iadd(&b, nir_ishl(&b, r2, nir_imm_int(&b, 16)),
+                                       nir_iadd(&b, nir_ishl(&b, g2, nir_imm_int(&b, 8)), b2));
+            d = nir_bcsel(&b, nir_uge(&b, d0, d2), nir_iadd(&b, d, nir_imm_int(&b, 1)), d);
+            nir_ssa_def *dist = etc2_distance_lookup(&b, d);
+            nir_ssa_def *base = nir_bcsel(&b, nir_ine(&b, msb, nir_imm_int(&b, 0)),
+                                          nir_vec3(&b, r2, g2, b2), nir_vec3(&b, r0, g0, b0));
+            base = etc_extend(&b, base, 4);
+            base = nir_iadd(
+               &b, base,
+               nir_imul(&b, dist,
+                        nir_isub(&b, nir_imm_int(&b, 1), nir_imul(&b, lsb, nir_imm_int(&b, 2)))));
+            nir_store_var(&b, rgb_result, base, 0x7);
+            nir_store_var(&b, punchthrough,
+                          nir_iand(&b, nir_load_var(&b, punchthrough),
+                                   nir_ieq(&b, nir_iadd(&b, lsb, msb), nir_imm_int(&b, 2))),
+                          0x1);
+         }
+         nir_push_else(&b, NULL);
+         nir_push_if(&b, nir_ult(&b, nir_imm_int(&b, 31), b1));
+         {
+            nir_ssa_def *r0 = nir_ubfe(&b, color_y, nir_imm_int(&b, 25), nir_imm_int(&b, 6));
+            nir_ssa_def *g0 = nir_ior(
+               &b, nir_ubfe(&b, color_y, nir_imm_int(&b, 17), nir_imm_int(&b, 6)),
+               nir_iand(&b, nir_ushr(&b, color_y, nir_imm_int(&b, 18)), nir_imm_int(&b, 0x40)));
+            nir_ssa_def *b0 = nir_ior(
+               &b,
+               nir_ishl(&b, nir_ubfe(&b, color_y, nir_imm_int(&b, 11), nir_imm_int(&b, 2)),
+                        nir_imm_int(&b, 3)),
+               nir_ior(
+                  &b,
+                  nir_iand(&b, nir_ushr(&b, color_y, nir_imm_int(&b, 11)), nir_imm_int(&b, 0x20)),
+                  nir_ubfe(&b, color_y, nir_imm_int(&b, 7), nir_imm_int(&b, 3))));
+            nir_ssa_def *rh =
+               nir_ior(&b, nir_iand(&b, color_y, nir_imm_int(&b, 1)),
+                       nir_ishl(&b, nir_ubfe(&b, color_y, nir_imm_int(&b, 2), nir_imm_int(&b, 5)),
+                                nir_imm_int(&b, 1)));
+            nir_ssa_def *rv = nir_ubfe(&b, color_x, nir_imm_int(&b, 13), nir_imm_int(&b, 6));
+            nir_ssa_def *gh = nir_ubfe(&b, color_x, nir_imm_int(&b, 25), nir_imm_int(&b, 7));
+            nir_ssa_def *gv = nir_ubfe(&b, color_x, nir_imm_int(&b, 6), nir_imm_int(&b, 7));
+            nir_ssa_def *bh = nir_ubfe(&b, color_x, nir_imm_int(&b, 19), nir_imm_int(&b, 6));
+            nir_ssa_def *bv = nir_ubfe(&b, color_x, nir_imm_int(&b, 0), nir_imm_int(&b, 6));
+
+            r0 = etc_extend(&b, r0, 6);
+            g0 = etc_extend(&b, g0, 7);
+            b0 = etc_extend(&b, b0, 6);
+            rh = etc_extend(&b, rh, 6);
+            rv = etc_extend(&b, rv, 6);
+            gh = etc_extend(&b, gh, 7);
+            gv = etc_extend(&b, gv, 7);
+            bh = etc_extend(&b, bh, 6);
+            bv = etc_extend(&b, bv, 6);
+
+            nir_ssa_def *rgb = nir_vec3(&b, r0, g0, b0);
+            nir_ssa_def *dx = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rh, gh, bh), rgb),
+                                       nir_channel(&b, pixel_coord, 0));
+            nir_ssa_def *dy = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rv, gv, bv), rgb),
+                                       nir_channel(&b, pixel_coord, 1));
+            rgb = nir_iadd(&b, rgb,
+                           nir_ishr(&b, nir_iadd(&b, nir_iadd(&b, dx, dy), nir_imm_int(&b, 2)),
+                                    nir_imm_int(&b, 2)));
+            nir_store_var(&b, rgb_result, rgb, 0x7);
+            nir_store_var(&b, punchthrough, nir_imm_bool(&b, false), 0x1);
+         }
+         nir_push_else(&b, NULL);
+         {
+            nir_store_var(&b, etc1_compat, nir_imm_bool(&b, true), 1);
+            nir_ssa_def *subblock_b = nir_ine(&b, subblock, nir_imm_int(&b, 0));
+            nir_ssa_def *tmp[] = {
+               nir_bcsel(&b, subblock_b, r1, rb),
+               nir_bcsel(&b, subblock_b, g1, gb),
+               nir_bcsel(&b, subblock_b, b1, bb),
+            };
+            nir_store_var(&b, base_rgb, etc_extend(&b, nir_vec(&b, tmp, 3), 5), 0x7);
+         }
+         nir_pop_if(&b, NULL);
+         nir_pop_if(&b, NULL);
+         nir_pop_if(&b, NULL);
+      }
+      nir_pop_if(&b, NULL);
+      nir_push_if(&b, nir_load_var(&b, etc1_compat));
+      {
+         nir_ssa_def *etc1_table_index =
+            nir_ubfe(&b, color_y,
+                     nir_isub(&b, nir_imm_int(&b, 5), nir_imul(&b, nir_imm_int(&b, 3), subblock)),
+                     nir_imm_int(&b, 3));
+         nir_ssa_def *sgn = nir_isub(&b, nir_imm_int(&b, 1), msb);
+         sgn = nir_bcsel(&b, nir_load_var(&b, punchthrough), nir_imul(&b, sgn, lsb), sgn);
+         nir_store_var(&b, punchthrough,
+                       nir_iand(&b, nir_load_var(&b, punchthrough),
+                                nir_ieq(&b, nir_iadd(&b, lsb, msb), nir_imm_int(&b, 2))),
+                       0x1);
+         nir_ssa_def *off =
+            nir_imul(&b, etc1_color_modifier_lookup(&b, etc1_table_index, lsb), sgn);
+         nir_ssa_def *result = nir_iadd(&b, nir_load_var(&b, base_rgb), off);
+         nir_store_var(&b, rgb_result, result, 0x7);
+      }
+      nir_pop_if(&b, NULL);
+      nir_push_if(&b, nir_load_var(&b, punchthrough));
+      {
+         nir_store_var(&b, alpha_result, nir_imm_float(&b, 0), 0x1);
+         nir_store_var(&b, rgb_result, nir_imm_ivec3(&b, 0, 0, 0), 0x7);
+      }
+      nir_pop_if(&b, NULL);
+      nir_ssa_def *col[4];
+      for (unsigned i = 0; i < 3; ++i)
+         col[i] = nir_fdiv(&b, nir_i2f32(&b, nir_channel(&b, nir_load_var(&b, rgb_result), i)),
+                           nir_imm_float(&b, 255.0));
+      col[3] = nir_load_var(&b, alpha_result);
+      nir_store_var(&b, color, nir_vec(&b, col, 4), 0xf);
+   }
+   nir_push_else(&b, NULL);
+   { /* EAC */
+      nir_ssa_def *is_signed =
+         nir_ior(&b, nir_ieq(&b, format, nir_imm_int(&b, VK_FORMAT_EAC_R11_SNORM_BLOCK)),
+                 nir_ieq(&b, format, nir_imm_int(&b, VK_FORMAT_EAC_R11G11_SNORM_BLOCK)));
+      nir_ssa_def *val[4];
+      for (int i = 0; i < 2; ++i) {
+         val[i] = decode_etc2_alpha(&b, nir_channels(&b, payload, 3 << (2 * i)), linear_pixel, true,
+                                    is_signed);
+      }
+      val[2] = nir_imm_float(&b, 0.0);
+      val[3] = nir_imm_float(&b, 1.0);
+      nir_store_var(&b, color, nir_vec(&b, val, 4), 0xf);
+   }
+   nir_pop_if(&b, NULL);
+
+   nir_ssa_def *outval = nir_load_var(&b, color);
+   nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1),
+                                     nir_channel(&b, coord, 2), nir_ssa_undef(&b, 1, 32));
+
+   nir_push_if(&b, is_3d);
+   {
+      nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_3d)->dest.ssa, img_coord,
+                            nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
+                            .image_dim = GLSL_SAMPLER_DIM_3D);
+   }
+   nir_push_else(&b, NULL);
+   {
+      nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_2d)->dest.ssa, img_coord,
+                            nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
+                            .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
+   }
+   nir_pop_if(&b, NULL);
+   return b.shader;
+}
+
+static VkResult
+create_layout(struct radv_device *device)
+{
+   VkResult result;
+   VkDescriptorSetLayoutCreateInfo ds_create_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+      .bindingCount = 2,
+      .pBindings = (VkDescriptorSetLayoutBinding[]){
+         {.binding = 0,
+          .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+         {.binding = 1,
+          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+          .descriptorCount = 1,
+          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+          .pImmutableSamplers = NULL},
+      }};
+
+   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+                                           &device->meta_state.alloc,
+                                           &device->meta_state.etc_decode.ds_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkPipelineLayoutCreateInfo pl_create_info = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+      .setLayoutCount = 1,
+      .pSetLayouts = &device->meta_state.etc_decode.ds_layout,
+      .pushConstantRangeCount = 1,
+      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
+   };
+
+   result =
+      radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+                                &device->meta_state.alloc, &device->meta_state.etc_decode.p_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+   return VK_SUCCESS;
+fail:
+   return result;
+}
+
+static VkResult
+create_decode_pipeline(struct radv_device *device, VkPipeline *pipeline)
+{
+   VkResult result;
+
+   mtx_lock(&device->meta_state.mtx);
+   if (*pipeline) {
+      mtx_unlock(&device->meta_state.mtx);
+      return VK_SUCCESS;
+   }
+
+   nir_shader *cs = build_shader(device);
+
+   /* compute shader */
+
+   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_handle_from_nir(cs),
+      .pName = "main",
+      .pSpecializationInfo = NULL,
+   };
+
+   VkComputePipelineCreateInfo vk_pipeline_info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = pipeline_shader_stage,
+      .flags = 0,
+      .layout = device->meta_state.resolve_compute.p_layout,
+   };
+
+   result = radv_CreateComputePipelines(radv_device_to_handle(device),
+                                        radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+                                        &vk_pipeline_info, NULL, pipeline);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   ralloc_free(cs);
+   mtx_unlock(&device->meta_state.mtx);
+   return VK_SUCCESS;
+fail:
+   ralloc_free(cs);
+   mtx_unlock(&device->meta_state.mtx);
+   return result;
+}
+
+VkResult
+radv_device_init_meta_etc_decode_state(struct radv_device *device, bool on_demand)
+{
+   struct radv_meta_state *state = &device->meta_state;
+   VkResult res;
+
+   if (!device->physical_device->emulate_etc2)
+      return VK_SUCCESS;
+
+   res = create_layout(device);
+   if (res != VK_SUCCESS)
+      goto fail;
+
+   if (on_demand)
+      return VK_SUCCESS;
+
+   res = create_decode_pipeline(device, &state->etc_decode.pipeline);
+   if (res != VK_SUCCESS)
+      goto fail;
+
+   return VK_SUCCESS;
+fail:
+   radv_device_finish_meta_etc_decode_state(device);
+   return res;
+}
+
+void
+radv_device_finish_meta_etc_decode_state(struct radv_device *device)
+{
+   struct radv_meta_state *state = &device->meta_state;
+   radv_DestroyPipeline(radv_device_to_handle(device), state->etc_decode.pipeline, &state->alloc);
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->etc_decode.p_layout,
+                              &state->alloc);
+   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->etc_decode.ds_layout,
+                                   &state->alloc);
+}
+
+static VkPipeline
+radv_get_etc_decode_pipeline(struct radv_cmd_buffer *cmd_buffer)
+{
+   struct radv_device *device = cmd_buffer->device;
+   VkPipeline *pipeline = &device->meta_state.etc_decode.pipeline;
+
+   if (!*pipeline) {
+      VkResult ret;
+
+      ret = create_decode_pipeline(device, pipeline);
+      if (ret != VK_SUCCESS) {
+         cmd_buffer->record_result = ret;
+         return VK_NULL_HANDLE;
+      }
+   }
+
+   return *pipeline;
+}
+
+static void
+decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
+           struct radv_image_view *dest_iview, const VkOffset3D *offset, const VkExtent3D *extent)
+{
+   struct radv_device *device = cmd_buffer->device;
+
+   radv_meta_push_descriptor_set(
+      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
+      0, /* set */
+      2, /* descriptorWriteCount */
+      (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                .dstBinding = 0,
+                                .dstArrayElement = 0,
+                                .descriptorCount = 1,
+                                .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+                                .pImageInfo =
+                                   (VkDescriptorImageInfo[]){
+                                      {.sampler = VK_NULL_HANDLE,
+                                       .imageView = radv_image_view_to_handle(src_iview),
+                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
+                                   }},
+                               {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                .dstBinding = 1,
+                                .dstArrayElement = 0,
+                                .descriptorCount = 1,
+                                .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                                .pImageInfo = (VkDescriptorImageInfo[]){
+                                   {
+                                      .sampler = VK_NULL_HANDLE,
+                                      .imageView = radv_image_view_to_handle(dest_iview),
+                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                                   },
+                                }}});
+
+   VkPipeline pipeline = radv_get_etc_decode_pipeline(cmd_buffer);
+
+   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+                        pipeline);
+
+   unsigned push_constants[5] = {
+      offset->x, offset->y, offset->z, src_iview->image->vk_format, src_iview->image->type,
+   };
+
+   radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                         device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT,
+                         0, 20, push_constants);
+   radv_unaligned_dispatch(cmd_buffer, extent->width, extent->height, extent->depth);
+}
+
+void
+radv_meta_decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+                     VkImageLayout layout, const VkImageSubresourceLayers *subresource,
+                     VkOffset3D offset, VkExtent3D extent)
+{
+   struct radv_meta_saved_state saved_state;
+   radv_meta_save(
+      &saved_state, cmd_buffer,
+      RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+   bool old_predicating = cmd_buffer->state.predicating;
+   cmd_buffer->state.predicating = false;
+
+   uint32_t base_slice = radv_meta_get_iview_layer(image, subresource, &offset);
+   uint32_t slice_count = image->type == VK_IMAGE_TYPE_3D ? extent.depth : subresource->layerCount;
+
+   extent = radv_sanitize_image_extent(image->type, extent);
+   offset = radv_sanitize_image_offset(image->type, offset);
+
+   VkFormat load_format = vk_format_get_blocksize(image->vk_format) == 16
+                             ? VK_FORMAT_R32G32B32A32_UINT
+                             : VK_FORMAT_R32G32_UINT;
+   struct radv_image_view src_iview;
+   radv_image_view_init(
+      &src_iview, cmd_buffer->device,
+      &(VkImageViewCreateInfo){
+         .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+         .image = radv_image_to_handle(image),
+         .viewType = radv_meta_get_view_type(image),
+         .format = load_format,
+         .subresourceRange =
+            {
+               .aspectMask = VK_IMAGE_ASPECT_PLANE_0_BIT,
+               .baseMipLevel = subresource->mipLevel,
+               .levelCount = 1,
+               .baseArrayLayer = 0,
+               .layerCount = subresource->baseArrayLayer + subresource->layerCount,
+            },
+      },
+      NULL);
+
+   VkFormat store_format;
+   switch (image->vk_format) {
+   case VK_FORMAT_EAC_R11_UNORM_BLOCK:
+      store_format = VK_FORMAT_R16_UNORM;
+      break;
+   case VK_FORMAT_EAC_R11_SNORM_BLOCK:
+      store_format = VK_FORMAT_R16_SNORM;
+      break;
+   case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
+      store_format = VK_FORMAT_R16G16_UNORM;
+      break;
+   case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
+      store_format = VK_FORMAT_R16G16_SNORM;
+      break;
+   default:
+      store_format = VK_FORMAT_R8G8B8A8_UNORM;
+   }
+   struct radv_image_view dest_iview;
+   radv_image_view_init(
+      &dest_iview, cmd_buffer->device,
+      &(VkImageViewCreateInfo){
+         .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+         .image = radv_image_to_handle(image),
+         .viewType = radv_meta_get_view_type(image),
+         .format = store_format,
+         .subresourceRange =
+            {
+               .aspectMask = VK_IMAGE_ASPECT_PLANE_1_BIT,
+               .baseMipLevel = subresource->mipLevel,
+               .levelCount = 1,
+               .baseArrayLayer = 0,
+               .layerCount = subresource->baseArrayLayer + subresource->layerCount,
+            },
+      },
+      NULL);
+
+   decode_etc(cmd_buffer, &src_iview, &dest_iview, &(VkOffset3D){offset.x, offset.y, base_slice},
+              &(VkExtent3D){extent.width, extent.height, slice_count});
+
+   radv_image_view_finish(&src_iview);
+   radv_image_view_finish(&dest_iview);
+
+   cmd_buffer->state.predicating = old_predicating;
+   radv_meta_restore(&saved_state, cmd_buffer);
+}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 7378fc81995..4226640ae70 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -669,6 +669,12 @@ struct radv_meta_state {
       VkPipelineLayout copy_p_layout;
       VkPipeline copy_pipeline;
    } accel_struct_build;
+
+   struct {
+      VkDescriptorSetLayout ds_layout;
+      VkPipelineLayout p_layout;
+      VkPipeline pipeline;
+   } etc_decode;
 };
 
 /* queue types */



More information about the mesa-commit mailing list