Mesa (main): radv,aco: don't include FMASK in the storage descriptor
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Thu Oct 7 17:52:54 UTC 2021
Module: Mesa
Branch: main
Commit: 1fb63367a8ca0b0fc64e278a8312b74b0ddde415
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1fb63367a8ca0b0fc64e278a8312b74b0ddde415
Author: Rhys Perry <pendingchaos02 at gmail.com>
Date: Wed Aug 4 18:19:20 2021 +0100
radv,aco: don't include FMASK in the storage descriptor
We perform a FMASK expand when transitioning to GENERAL or TRANSFER_DST
layout, so storage images always have an identity FMASK.
radeonsi doesn't appear to expand the FMASK for read-only storage images,
so the sample index adjustment is still needed there.
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12214>
---
src/amd/compiler/aco_instruction_selection.cpp | 92 +-------------------------
src/amd/llvm/ac_nir_to_llvm.c | 3 +
src/amd/vulkan/radv_descriptor_set.c | 23 ++++++-
src/amd/vulkan/radv_image.c | 2 +-
src/amd/vulkan/radv_nir_to_llvm.c | 3 +
5 files changed, 29 insertions(+), 94 deletions(-)
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index cc471a074a8..f698cc63c37 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -5992,74 +5992,6 @@ visit_bvh64_intersect_ray_amd(isel_context* ctx, nir_intrinsic_instr* instr)
mimg->r128 = true;
}
-/* Adjust the sample index according to FMASK.
- *
- * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
- * which is the identity mapping. Each nibble says which physical sample
- * should be fetched to get that sample.
- *
- * For example, 0x11111100 means there are only 2 samples stored and
- * the second sample covers 3/4 of the pixel. When reading samples 0
- * and 1, return physical sample 0 (determined by the first two 0s
- * in FMASK), otherwise return physical sample 1.
- *
- * The sample index should be adjusted as follows:
- * sample_index = (fmask >> (sample_index * 4)) & 0xF;
- */
-static Temp
-adjust_sample_index_using_fmask(isel_context* ctx, bool da, std::vector<Temp>& coords,
- Operand sample_index, Temp fmask_desc_ptr)
-{
- Builder bld(ctx->program, ctx->block);
- Temp fmask = bld.tmp(v1);
- unsigned dim = ctx->options->chip_class >= GFX10
- ? ac_get_sampler_dim(ctx->options->chip_class, GLSL_SAMPLER_DIM_2D, da)
- : 0;
-
- MIMG_instruction* load = emit_mimg(bld, aco_opcode::image_load, Definition(fmask),
- fmask_desc_ptr, Operand(s4), coords);
- load->glc = false;
- load->dlc = false;
- load->dmask = 0x1;
- load->unrm = true;
- load->da = da;
- load->dim = dim;
-
- /* Don't adjust the sample index if WORD1.DATA_FORMAT of the FMASK
- * resource descriptor is 0 (invalid),
- */
- Temp is_not_null = bld.tmp(bld.lm);
- bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(is_not_null), Operand::zero(),
- emit_extract_vector(ctx, fmask_desc_ptr, 1, s1))
- .def(0)
- .setHint(vcc);
- fmask =
- bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::c32(0x76543210), fmask, is_not_null);
-
- Operand sample_index4;
- if (sample_index.isConstant()) {
- if (sample_index.constantValue() < 16) {
- sample_index4 = Operand::c32(sample_index.constantValue() << 2);
- } else {
- sample_index4 = Operand::zero();
- }
- } else if (sample_index.regClass() == s1) {
- sample_index4 = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), sample_index,
- Operand::c32(2u));
- } else {
- assert(sample_index.regClass() == v1);
- sample_index4 =
- bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), sample_index);
- }
-
- if (sample_index4.isConstant() && sample_index4.constantValue() == 0)
- return bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(15u), fmask);
- else if (sample_index4.isConstant() && sample_index4.constantValue() == 28)
- return bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand::c32(28u), fmask);
- else
- return bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), fmask, sample_index4, Operand::c32(4u));
-}
-
static std::vector<Temp>
get_image_coords(isel_context* ctx, const nir_intrinsic_instr* instr)
{
@@ -6076,28 +6008,8 @@ get_image_coords(isel_context* ctx, const nir_intrinsic_instr* instr)
std::vector<Temp> coords(count);
Builder bld(ctx->program, ctx->block);
- if (is_ms) {
- count--;
- Temp src2 = get_ssa_temp(ctx, instr->src[2].ssa);
- /* get sample index */
- if (instr->intrinsic == nir_intrinsic_image_deref_load ||
- instr->intrinsic == nir_intrinsic_image_deref_sparse_load) {
- nir_const_value* sample_cv = nir_src_as_const_value(instr->src[2]);
- Operand sample_index = sample_cv ? Operand::c32(sample_cv->u32)
- : Operand(emit_extract_vector(ctx, src2, 0, v1));
- std::vector<Temp> fmask_load_address;
- for (unsigned i = 0; i < (is_array ? 3 : 2); i++)
- fmask_load_address.emplace_back(emit_extract_vector(ctx, src0, i, v1));
-
- Temp fmask_desc_ptr =
- get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
- ACO_DESC_FMASK, nullptr, false);
- coords[count] = adjust_sample_index_using_fmask(ctx, is_array, fmask_load_address,
- sample_index, fmask_desc_ptr);
- } else {
- coords[count] = emit_extract_vector(ctx, src2, 0, v1);
- }
- }
+ if (is_ms)
+ coords[--count] = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[2].ssa), 0, v1);
if (gfx9_1d) {
coords[0] = emit_extract_vector(ctx, src0, 0, v1);
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index b8fdd5d9dc7..c0477a21cc5 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -2387,6 +2387,9 @@ static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
LLVMValueRef coord_z, LLVMValueRef sample_index,
LLVMValueRef fmask_desc_ptr)
{
+ if (!fmask_desc_ptr)
+ return sample_index;
+
unsigned sample_chan = coord_z ? 3 : 2;
LLVMValueRef addr[4] = {coord_x, coord_y, coord_z};
addr[sample_chan] = sample_index;
diff --git a/src/amd/vulkan/radv_descriptor_set.c b/src/amd/vulkan/radv_descriptor_set.c
index c752fceed87..d6449742f64 100644
--- a/src/amd/vulkan/radv_descriptor_set.c
+++ b/src/amd/vulkan/radv_descriptor_set.c
@@ -67,6 +67,9 @@ radv_mutable_descriptor_type_size_alignment(const VkMutableDescriptorTypeListVAL
align = 16;
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ size = 32;
+ align = 32;
+ break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
size = 64;
align = 32;
@@ -219,6 +222,10 @@ radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCrea
alignment = 16;
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ set_layout->binding[b].size = 32;
+ binding_buffer_count = 1;
+ alignment = 32;
+ break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
/* main descriptor + fmask descriptor */
@@ -385,6 +392,9 @@ radv_GetDescriptorSetLayoutSupport(VkDevice device,
descriptor_alignment = 16;
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ descriptor_size = 32;
+ descriptor_alignment = 32;
+ break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
descriptor_size = 64;
@@ -739,11 +749,11 @@ radv_CreateDescriptorPool(VkDevice _device, const VkDescriptorPoolCreateInfo *pC
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_SAMPLER:
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
/* 32 as we may need to align for images */
bo_size += 32 * pCreateInfo->pPoolSizes[i].descriptorCount;
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
bo_size += 64 * pCreateInfo->pPoolSizes[i].descriptorCount;
break;
@@ -1169,8 +1179,11 @@ radv_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *
write_texel_buffer_descriptor(device, cmd_buffer, ptr, buffer_list,
writeset->pTexelBufferView[j]);
break;
- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ write_image_descriptor(device, cmd_buffer, 32, ptr, buffer_list,
+ writeset->descriptorType, writeset->pImageInfo + j);
+ break;
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
write_image_descriptor(device, cmd_buffer, 64, ptr, buffer_list,
writeset->descriptorType, writeset->pImageInfo + j);
@@ -1436,8 +1449,12 @@ radv_update_descriptor_set_with_template(struct radv_device *device,
write_texel_buffer_descriptor(device, cmd_buffer, pDst, buffer_list,
*(VkBufferView *)pSrc);
break;
- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ write_image_descriptor(device, cmd_buffer, 32, pDst, buffer_list,
+ templ->entry[i].descriptor_type,
+ (struct VkDescriptorImageInfo *)pSrc);
+ break;
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
write_image_descriptor(device, cmd_buffer, 64, pDst, buffer_list,
templ->entry[i].descriptor_type,
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 05398b4ba4c..bd826c9b463 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -1810,7 +1810,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic
vk_format_get_plane_width(image->vk_format, plane_id, iview->extent.width),
vk_format_get_plane_height(image->vk_format, plane_id, iview->extent.height),
iview->extent.depth, descriptor->plane_descriptors[descriptor_plane_id],
- descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
+ descriptor_plane_id || is_storage_image ? NULL : descriptor->fmask_descriptor);
const struct legacy_surf_level *base_level_info = NULL;
if (device->physical_device->rad_info.chip_class <= GFX9) {
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 5de1031beea..642b06e4491 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -497,6 +497,9 @@ radv_get_sampler_desc(struct ac_shader_abi *abi, unsigned descriptor_set, unsign
assert(base_index < layout->binding_count);
+ if (binding->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE && desc_type == AC_DESC_FMASK)
+ return NULL;
+
switch (desc_type) {
case AC_DESC_IMAGE:
type = ctx->ac.v8i32;
More information about the mesa-commit
mailing list