Mesa (staging/22.1): radv: only apply enable_mrt_output_nan_fixup for 32-bit float MRTs

Fri Apr 22 16:02:24 UTC 2022

Module: Mesa
Branch: staging/22.1
Commit: ae8586067977921e99dc41af6caad46897f237c0
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=ae8586067977921e99dc41af6caad46897f237c0

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Thu Apr 21 12:41:27 2022 +0200

radv: only apply enable_mrt_output_nan_fixup for 32-bit float MRTs

This is incorrect for 32-bit integer MRTs which are clamped to the
maximum value of the format, and returning 0 can break some shaders.

This fixes a rendering issue with RAGE2.

Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/4329
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16080>
(cherry picked from commit 5121e6dd7ea9127842814239fe5a8b5dd231dd8c)

---

 .pick_status.json                 |  2 +-
 src/amd/vulkan/radv_nir_to_llvm.c | 10 +++-------
 src/amd/vulkan/radv_pipeline.c    | 28 +++++++++++++++++++---------
 src/amd/vulkan/radv_shader.h      |  4 ++--
 4 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 4e82dadb558..b65b3db9620 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -229,7 +229,7 @@
         "description": "radv: only apply enable_mrt_output_nan_fixup for 32-bit float MRTs",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": null
     },
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 408ccc31970..49b10ff910e 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -649,6 +649,7 @@ si_llvm_init_export_args(struct radv_shader_context *ctx, LLVMValueRef *values,
       unsigned col_format = (ctx->options->key.ps.col_format >> (4 * index)) & 0xf;
       bool is_int8 = (ctx->options->key.ps.is_int8 >> index) & 1;
       bool is_int10 = (ctx->options->key.ps.is_int10 >> index) & 1;
+      bool enable_mrt_output_nan_fixup = (ctx->options->key.ps.enable_mrt_output_nan_fixup >> index) & 1;
 
       LLVMValueRef (*packf)(struct ac_llvm_context * ctx, LLVMValueRef args[2]) = NULL;
       LLVMValueRef (*packi)(struct ac_llvm_context * ctx, LLVMValueRef args[2], unsigned bits,
@@ -728,13 +729,8 @@ si_llvm_init_export_args(struct radv_shader_context *ctx, LLVMValueRef *values,
          break;
       }
 
-      /* Replace NaN by zero (only 32-bit) to fix game bugs if
-       * requested.
-       */
-      if (ctx->options->enable_mrt_output_nan_fixup && !is_16bit &&
-          (col_format == V_028714_SPI_SHADER_32_R || col_format == V_028714_SPI_SHADER_32_GR ||
-           col_format == V_028714_SPI_SHADER_32_AR || col_format == V_028714_SPI_SHADER_32_ABGR ||
-           col_format == V_028714_SPI_SHADER_FP16_ABGR)) {
+      /* Replace NaN by zero (for 32-bit float formats) to fix game bugs if requested. */
+      if (enable_mrt_output_nan_fixup && !is_16bit) {
          for (unsigned i = 0; i < 4; i++) {
             LLVMValueRef class_args[2] = {values[i],
                                           LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN, false)};
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 431c35dfdcf..3086ef24d19 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -60,6 +60,7 @@ struct radv_blend_state {
    uint32_t spi_shader_col_format;
    uint32_t col_format_is_int8;
    uint32_t col_format_is_int10;
+   uint32_t col_format_is_float32;
    uint32_t cb_shader_mask;
    uint32_t db_alpha_to_mask;
 
@@ -529,6 +530,16 @@ format_is_int10(VkFormat format)
    return false;
 }
 
+static bool
+format_is_float32(VkFormat format)
+{
+   const struct util_format_description *desc = vk_format_description(format);
+   int channel = vk_format_get_first_non_void_channel(format);
+
+   return channel >= 0 &&
+          desc->channel[channel].type == UTIL_FORMAT_TYPE_FLOAT && desc->channel[channel].size == 32;
+}
+
 static void
 radv_pipeline_compute_spi_color_formats(const struct radv_pipeline *pipeline,
                                         const VkGraphicsPipelineCreateInfo *pCreateInfo,
@@ -536,7 +547,7 @@ radv_pipeline_compute_spi_color_formats(const struct radv_pipeline *pipeline,
 {
    const VkPipelineRenderingCreateInfo *render_create_info =
       vk_find_struct_const(pCreateInfo->pNext, PIPELINE_RENDERING_CREATE_INFO);
-   unsigned col_format = 0, is_int8 = 0, is_int10 = 0;
+   unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0;
    unsigned num_targets;
 
    for (unsigned i = 0; i < render_create_info->colorAttachmentCount; ++i) {
@@ -555,6 +566,8 @@ radv_pipeline_compute_spi_color_formats(const struct radv_pipeline *pipeline,
             is_int8 |= 1 << i;
          if (format_is_int10(fmt))
             is_int10 |= 1 << i;
+         if (format_is_float32(fmt))
+            is_float32 |= 1 << i;
       }
 
       col_format |= cf << (4 * i);
@@ -590,6 +603,7 @@ radv_pipeline_compute_spi_color_formats(const struct radv_pipeline *pipeline,
    blend->spi_shader_col_format = col_format;
    blend->col_format_is_int8 = is_int8;
    blend->col_format_is_int10 = is_int10;
+   blend->col_format_is_float32 = is_float32;
 }
 
 /*
@@ -3074,7 +3088,7 @@ radv_generate_graphics_pipeline_key(const struct radv_pipeline *pipeline,
       key.ps.lower_discard_to_demote = true;
 
    if (pipeline->device->instance->enable_mrt_output_nan_fixup)
-      key.ps.enable_mrt_output_nan_fixup = true;
+      key.ps.enable_mrt_output_nan_fixup = blend->col_format_is_float32;
 
 
    key.ps.force_vrs_enabled = pipeline->device->force_vrs_enabled;
@@ -3941,6 +3955,7 @@ radv_lower_fs_output(nir_shader *nir, const struct radv_pipeline_key *pipeline_k
          unsigned col_format = (pipeline_key->ps.col_format >> (4 * slot)) & 0xf;
          bool is_int8 = (pipeline_key->ps.is_int8 >> slot) & 1;
          bool is_int10 = (pipeline_key->ps.is_int10 >> slot) & 1;
+         bool enable_mrt_output_nan_fixup = (pipeline_key->ps.enable_mrt_output_nan_fixup >> slot) & 1;
          bool is_16bit = intrin->src[0].ssa->bit_size == 16;
 
          if (col_format == V_028714_SPI_SHADER_ZERO)
@@ -3958,13 +3973,8 @@ radv_lower_fs_output(nir_shader *nir, const struct radv_pipeline_key *pipeline_k
             }
          }
 
-         /* Replace NaN by zero (only 32-bit) to fix game bugs if requested. */
-         if (pipeline_key->ps.enable_mrt_output_nan_fixup && !nir->info.internal && !is_16bit &&
-             (col_format == V_028714_SPI_SHADER_32_R ||
-              col_format == V_028714_SPI_SHADER_32_GR ||
-              col_format == V_028714_SPI_SHADER_32_AR ||
-              col_format == V_028714_SPI_SHADER_32_ABGR ||
-              col_format == V_028714_SPI_SHADER_FP16_ABGR)) {
+         /* Replace NaN by zero (for 32-bit float formats) to fix game bugs if requested. */
+         if (enable_mrt_output_nan_fixup && !nir->info.internal && !is_16bit) {
             u_foreach_bit(i, write_mask) {
                const bool save_exact = b.exact;
 
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 6cccc10e34d..c3a5ecc01c2 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -93,7 +93,7 @@ struct radv_pipeline_key {
       bool mrt0_is_dual_src;
 
       bool lower_discard_to_demote;
-      bool enable_mrt_output_nan_fixup;
+      uint8_t enable_mrt_output_nan_fixup;
       bool force_vrs_enabled;
    } ps;
 
@@ -120,7 +120,7 @@ struct radv_nir_compiler_options {
    bool record_stats;
    bool check_ir;
    bool has_ls_vgpr_init_bug;
-   bool enable_mrt_output_nan_fixup;
+   uint8_t enable_mrt_output_nan_fixup;
    bool wgp_mode;
    enum radeon_family family;
    enum chip_class chip_class;