Mesa (main): radv,aco: don't include FMASK in the storage descriptor

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Oct 7 17:52:54 UTC 2021


Module: Mesa
Branch: main
Commit: 1fb63367a8ca0b0fc64e278a8312b74b0ddde415
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=1fb63367a8ca0b0fc64e278a8312b74b0ddde415

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Wed Aug  4 18:19:20 2021 +0100

radv,aco: don't include FMASK in the storage descriptor

We perform a FMASK expand when transitioning to GENERAL or TRANSFER_DST
layout, so storage images always have an identity FMASK.

radeonsi doesn't appear to expand the FMASK for read-only storage images,
so the sample index adjustment is still needed there.

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12214>

---

 src/amd/compiler/aco_instruction_selection.cpp | 92 +-------------------------
 src/amd/llvm/ac_nir_to_llvm.c                  |  3 +
 src/amd/vulkan/radv_descriptor_set.c           | 23 ++++++-
 src/amd/vulkan/radv_image.c                    |  2 +-
 src/amd/vulkan/radv_nir_to_llvm.c              |  3 +
 5 files changed, 29 insertions(+), 94 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index cc471a074a8..f698cc63c37 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -5992,74 +5992,6 @@ visit_bvh64_intersect_ray_amd(isel_context* ctx, nir_intrinsic_instr* instr)
    mimg->r128 = true;
 }
 
-/* Adjust the sample index according to FMASK.
- *
- * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
- * which is the identity mapping. Each nibble says which physical sample
- * should be fetched to get that sample.
- *
- * For example, 0x11111100 means there are only 2 samples stored and
- * the second sample covers 3/4 of the pixel. When reading samples 0
- * and 1, return physical sample 0 (determined by the first two 0s
- * in FMASK), otherwise return physical sample 1.
- *
- * The sample index should be adjusted as follows:
- *   sample_index = (fmask >> (sample_index * 4)) & 0xF;
- */
-static Temp
-adjust_sample_index_using_fmask(isel_context* ctx, bool da, std::vector<Temp>& coords,
-                                Operand sample_index, Temp fmask_desc_ptr)
-{
-   Builder bld(ctx->program, ctx->block);
-   Temp fmask = bld.tmp(v1);
-   unsigned dim = ctx->options->chip_class >= GFX10
-                     ? ac_get_sampler_dim(ctx->options->chip_class, GLSL_SAMPLER_DIM_2D, da)
-                     : 0;
-
-   MIMG_instruction* load = emit_mimg(bld, aco_opcode::image_load, Definition(fmask),
-                                      fmask_desc_ptr, Operand(s4), coords);
-   load->glc = false;
-   load->dlc = false;
-   load->dmask = 0x1;
-   load->unrm = true;
-   load->da = da;
-   load->dim = dim;
-
-   /* Don't adjust the sample index if WORD1.DATA_FORMAT of the FMASK
-    * resource descriptor is 0 (invalid),
-    */
-   Temp is_not_null = bld.tmp(bld.lm);
-   bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(is_not_null), Operand::zero(),
-                emit_extract_vector(ctx, fmask_desc_ptr, 1, s1))
-      .def(0)
-      .setHint(vcc);
-   fmask =
-      bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::c32(0x76543210), fmask, is_not_null);
-
-   Operand sample_index4;
-   if (sample_index.isConstant()) {
-      if (sample_index.constantValue() < 16) {
-         sample_index4 = Operand::c32(sample_index.constantValue() << 2);
-      } else {
-         sample_index4 = Operand::zero();
-      }
-   } else if (sample_index.regClass() == s1) {
-      sample_index4 = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), sample_index,
-                               Operand::c32(2u));
-   } else {
-      assert(sample_index.regClass() == v1);
-      sample_index4 =
-         bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), sample_index);
-   }
-
-   if (sample_index4.isConstant() && sample_index4.constantValue() == 0)
-      return bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(15u), fmask);
-   else if (sample_index4.isConstant() && sample_index4.constantValue() == 28)
-      return bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand::c32(28u), fmask);
-   else
-      return bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), fmask, sample_index4, Operand::c32(4u));
-}
-
 static std::vector<Temp>
 get_image_coords(isel_context* ctx, const nir_intrinsic_instr* instr)
 {
@@ -6076,28 +6008,8 @@ get_image_coords(isel_context* ctx, const nir_intrinsic_instr* instr)
    std::vector<Temp> coords(count);
    Builder bld(ctx->program, ctx->block);
 
-   if (is_ms) {
-      count--;
-      Temp src2 = get_ssa_temp(ctx, instr->src[2].ssa);
-      /* get sample index */
-      if (instr->intrinsic == nir_intrinsic_image_deref_load ||
-          instr->intrinsic == nir_intrinsic_image_deref_sparse_load) {
-         nir_const_value* sample_cv = nir_src_as_const_value(instr->src[2]);
-         Operand sample_index = sample_cv ? Operand::c32(sample_cv->u32)
-                                          : Operand(emit_extract_vector(ctx, src2, 0, v1));
-         std::vector<Temp> fmask_load_address;
-         for (unsigned i = 0; i < (is_array ? 3 : 2); i++)
-            fmask_load_address.emplace_back(emit_extract_vector(ctx, src0, i, v1));
-
-         Temp fmask_desc_ptr =
-            get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
-                             ACO_DESC_FMASK, nullptr, false);
-         coords[count] = adjust_sample_index_using_fmask(ctx, is_array, fmask_load_address,
-                                                         sample_index, fmask_desc_ptr);
-      } else {
-         coords[count] = emit_extract_vector(ctx, src2, 0, v1);
-      }
-   }
+   if (is_ms)
+      coords[--count] = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[2].ssa), 0, v1);
 
    if (gfx9_1d) {
       coords[0] = emit_extract_vector(ctx, src0, 0, v1);
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index b8fdd5d9dc7..c0477a21cc5 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -2387,6 +2387,9 @@ static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
                                                     LLVMValueRef coord_z, LLVMValueRef sample_index,
                                                     LLVMValueRef fmask_desc_ptr)
 {
+   if (!fmask_desc_ptr)
+      return sample_index;
+
    unsigned sample_chan = coord_z ? 3 : 2;
    LLVMValueRef addr[4] = {coord_x, coord_y, coord_z};
    addr[sample_chan] = sample_index;
diff --git a/src/amd/vulkan/radv_descriptor_set.c b/src/amd/vulkan/radv_descriptor_set.c
index c752fceed87..d6449742f64 100644
--- a/src/amd/vulkan/radv_descriptor_set.c
+++ b/src/amd/vulkan/radv_descriptor_set.c
@@ -67,6 +67,9 @@ radv_mutable_descriptor_type_size_alignment(const VkMutableDescriptorTypeListVAL
          align = 16;
          break;
       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+         size = 32;
+         align = 32;
+         break;
       case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
          size = 64;
          align = 32;
@@ -219,6 +222,10 @@ radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCrea
          alignment = 16;
          break;
       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+         set_layout->binding[b].size = 32;
+         binding_buffer_count = 1;
+         alignment = 32;
+         break;
       case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
       case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
          /* main descriptor + fmask descriptor */
@@ -385,6 +392,9 @@ radv_GetDescriptorSetLayoutSupport(VkDevice device,
          descriptor_alignment = 16;
          break;
       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+         descriptor_size = 32;
+         descriptor_alignment = 32;
+         break;
       case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
       case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
          descriptor_size = 64;
@@ -739,11 +749,11 @@ radv_CreateDescriptorPool(VkDevice _device, const VkDescriptorPoolCreateInfo *pC
       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
       case VK_DESCRIPTOR_TYPE_SAMPLER:
       case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
+      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
          /* 32 as we may need to align for images */
          bo_size += 32 * pCreateInfo->pPoolSizes[i].descriptorCount;
          break;
       case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
-      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
       case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
          bo_size += 64 * pCreateInfo->pPoolSizes[i].descriptorCount;
          break;
@@ -1169,8 +1179,11 @@ radv_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *
             write_texel_buffer_descriptor(device, cmd_buffer, ptr, buffer_list,
                                           writeset->pTexelBufferView[j]);
             break;
-         case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
          case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+            write_image_descriptor(device, cmd_buffer, 32, ptr, buffer_list,
+                                   writeset->descriptorType, writeset->pImageInfo + j);
+            break;
+         case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
          case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
             write_image_descriptor(device, cmd_buffer, 64, ptr, buffer_list,
                                    writeset->descriptorType, writeset->pImageInfo + j);
@@ -1436,8 +1449,12 @@ radv_update_descriptor_set_with_template(struct radv_device *device,
             write_texel_buffer_descriptor(device, cmd_buffer, pDst, buffer_list,
                                           *(VkBufferView *)pSrc);
             break;
-         case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
          case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+            write_image_descriptor(device, cmd_buffer, 32, pDst, buffer_list,
+                                   templ->entry[i].descriptor_type,
+                                   (struct VkDescriptorImageInfo *)pSrc);
+            break;
+         case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
          case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
             write_image_descriptor(device, cmd_buffer, 64, pDst, buffer_list,
                                    templ->entry[i].descriptor_type,
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 05398b4ba4c..bd826c9b463 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -1810,7 +1810,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic
       vk_format_get_plane_width(image->vk_format, plane_id, iview->extent.width),
       vk_format_get_plane_height(image->vk_format, plane_id, iview->extent.height),
       iview->extent.depth, descriptor->plane_descriptors[descriptor_plane_id],
-      descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
+      descriptor_plane_id || is_storage_image ? NULL : descriptor->fmask_descriptor);
 
    const struct legacy_surf_level *base_level_info = NULL;
    if (device->physical_device->rad_info.chip_class <= GFX9) {
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 5de1031beea..642b06e4491 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -497,6 +497,9 @@ radv_get_sampler_desc(struct ac_shader_abi *abi, unsigned descriptor_set, unsign
 
    assert(base_index < layout->binding_count);
 
+   if (binding->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE && desc_type == AC_DESC_FMASK)
+      return NULL;
+
    switch (desc_type) {
    case AC_DESC_IMAGE:
       type = ctx->ac.v8i32;



More information about the mesa-commit mailing list