Mesa (master): radeonsi: add an option to enable 2x2 coarse shading for non-GUI elements

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Nov 17 22:52:34 UTC 2020


Module: Mesa
Branch: master
Commit: c3432ad852449ec31580a0b77af785e37eaa48f9
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=c3432ad852449ec31580a0b77af785e37eaa48f9

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Wed Nov 11 11:41:49 2020 -0500

radeonsi: add an option to enable 2x2 coarse shading for non-GUI elements

This is for experiments with VRS.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7646>

---

 src/gallium/drivers/radeonsi/si_debug_options.h  |  1 +
 src/gallium/drivers/radeonsi/si_gfx_cs.c         |  1 +
 src/gallium/drivers/radeonsi/si_pipe.c           |  3 +++
 src/gallium/drivers/radeonsi/si_shader_llvm_vs.c | 31 ++++++++++++++++++++++--
 src/gallium/drivers/radeonsi/si_state.c          | 31 +++++++++++++++++++++---
 src/gallium/drivers/radeonsi/si_state.h          |  4 ++-
 src/gallium/drivers/radeonsi/si_state_shaders.c  |  8 ++++++
 7 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_debug_options.h b/src/gallium/drivers/radeonsi/si_debug_options.h
index 306ec92950d..b550be72ba8 100644
--- a/src/gallium/drivers/radeonsi/si_debug_options.h
+++ b/src/gallium/drivers/radeonsi/si_debug_options.h
@@ -12,5 +12,6 @@ OPT_BOOL(no_infinite_interp, false, "Kill PS with infinite interp coeff")
 OPT_BOOL(clamp_div_by_zero, false, "Clamp div by zero (x / 0 becomes FLT_MAX instead of NaN)")
 OPT_BOOL(no_trunc_coord, false, "Always set TRUNC_COORD=0")
 OPT_BOOL(shader_culling, false, "Cull primitives in shaders when benefical (without tess and GS)")
+OPT_BOOL(vrs2x2, false, "Enable 2x2 coarse shading for non-GUI elements")
 
 #undef OPT_BOOL
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 75986169d9a..4747ffd5a35 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -347,6 +347,7 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx)
    ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x00090000;
    ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x00000003;
    ctx->tracked_regs.reg_value[SI_TRACKED_DB_DFSM_CONTROL] = 0x00000000;
+   ctx->tracked_regs.reg_value[SI_TRACKED_DB_VRS_OVERRIDE_CNTL] = 0x00000000;
    ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ] = 0x3f800000;
    ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ] = 0x3f800000;
    ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ] = 0x3f800000;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 8ffe3f96ede..9255002c33a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1080,6 +1080,9 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
 #include "si_debug_options.h"
    }
 
+   if (sscreen->info.chip_class < GFX10_3)
+      sscreen->options.vrs2x2 = false;
+
    si_disk_cache_create(sscreen);
 
    /* Determine the number of shader compiler threads. */
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
index bafe964c84c..2d766532cbe 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
@@ -605,12 +605,13 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx,
 
    bool writes_psize = shader->selector->info.writes_psize && !shader->key.opt.kill_pointsize;
    bool pos_writes_edgeflag = shader->selector->info.writes_edgeflag && !shader->key.as_ngg;
+   bool writes_vrs = ctx->screen->options.vrs2x2;
 
    /* Write the misc vector (point size, edgeflag, layer, viewport). */
-   if (writes_psize || pos_writes_edgeflag ||
+   if (writes_psize || pos_writes_edgeflag || writes_vrs ||
        shader->selector->info.writes_viewport_index || shader->selector->info.writes_layer) {
       pos_args[1].enabled_channels = writes_psize |
-                                     (pos_writes_edgeflag << 1) |
+                                     ((pos_writes_edgeflag | writes_vrs) << 1) |
                                      (shader->selector->info.writes_layer << 2);
 
       pos_args[1].valid_mask = 0; /* EXEC mask */
@@ -635,6 +636,32 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx,
          pos_args[1].out[1] = ac_to_float(&ctx->ac, edgeflag_value);
       }
 
+      if (writes_vrs) {
+         /* Bits [2:3] = VRS rate X
+          * Bits [4:5] = VRS rate Y
+          *
+          * The range is [-2, 1]. Values:
+          *   1: 2x coarser shading rate in that direction.
+          *   0: normal shading rate
+          *  -1: 2x finer shading rate (sample shading, not directional)
+          *  -2: 4x finer shading rate (sample shading, not directional)
+          *
+          * Sample shading can't go above 8 samples, so both numbers can't be -2
+          * at the same time.
+          */
+         LLVMValueRef rates = LLVMConstInt(ctx->ac.i32, (1 << 2) | (1 << 4), 0);
+
+         /* If Pos.W != 1 (typical for non-GUI elements), use 2x2 coarse shading. */
+         rates = LLVMBuildSelect(ctx->ac.builder,
+                                 LLVMBuildFCmp(ctx->ac.builder, LLVMRealUNE,
+                                               pos_args[0].out[3], ctx->ac.f32_1, ""),
+                                 rates, ctx->ac.i32_0, "");
+
+         LLVMValueRef v = ac_to_integer(&ctx->ac, pos_args[1].out[1]);
+         v = LLVMBuildOr(ctx->ac.builder, v, rates, "");
+         pos_args[1].out[1] = ac_to_float(&ctx->ac, v);
+      }
+
       if (ctx->screen->info.chip_class >= GFX9) {
          /* GFX9 has the layer in out.z[10:0] and the viewport
           * index in out.z[19:16].
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index b7b2bb2c209..3bd84e84719 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -751,7 +751,8 @@ static void si_emit_clip_regs(struct si_context *sctx)
    unsigned initial_cdw = sctx->gfx_cs->current.cdw;
    unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((vs_out_mask & 0x0F) != 0) |
                          S_02881C_VS_OUT_CCDIST1_VEC_ENA((vs_out_mask & 0xF0) != 0) |
-                         S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3) |
+                         S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3 &&
+                                                           !sctx->screen->options.vrs2x2) |
                          S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->chip_class >= GFX10_3) |
                          clipdist_mask | (culldist_mask << 8);
 
@@ -1407,6 +1408,21 @@ static void si_emit_db_render_state(struct si_context *sctx)
    radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL,
                               db_shader_control);
 
+   if (sctx->screen->options.vrs2x2) {
+      /* If the shader is using discard, turn off coarse shading because
+       * discard at 2x2 pixel granularity degrades quality too much.
+       *
+       * MIN allows sample shading but not coarse shading.
+       */
+      unsigned mode = G_02880C_KILL_ENABLE(db_shader_control) ? V_028064_VRS_COMB_MODE_MIN
+                                                              : V_028064_VRS_COMB_MODE_PASSTHRU;
+      radeon_opt_set_context_reg(sctx, R_028064_DB_VRS_OVERRIDE_CNTL,
+                                 SI_TRACKED_DB_VRS_OVERRIDE_CNTL,
+                                 S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) |
+                                 S_028064_VRS_OVERRIDE_RATE_X(0) |
+                                 S_028064_VRS_OVERRIDE_RATE_Y(0));
+   }
+
    if (initial_cdw != sctx->gfx_cs->current.cdw)
       sctx->context_roll = true;
 }
@@ -5366,9 +5382,18 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
 
    if (sctx->chip_class >= GFX10_3) {
       si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff);
-      /* This allows sample shading. */
+      /* The rate combiners have no effect if they are disabled like this:
+       *   VERTEX_RATE:    BYPASS_VTX_RATE_COMBINER = 1
+       *   PRIMITIVE_RATE: BYPASS_PRIM_RATE_COMBINER = 1
+       *   HTILE_RATE:     VRS_HTILE_ENCODING = 0
+       *   SAMPLE_ITER:    PS_ITER_SAMPLE = 0
+       *
+       * Use OVERRIDE, which will ignore results from previous combiners.
+       * (e.g. enabled sample shading overrides the vertex rate)
+       */
       si_pm4_set_reg(pm4, R_028848_PA_CL_VRS_CNTL,
-                     S_028848_SAMPLE_ITER_COMBINER_MODE(1));
+                     S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) |
+                     S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE));
    }
 
    sctx->cs_preamble_state = pm4;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 9ac4d51889c..1d94d8c8c76 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -250,7 +250,8 @@ struct si_shader_data {
 #define SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK                                                      \
    (S_02881C_USE_VTX_POINT_SIZE(1) | S_02881C_USE_VTX_EDGE_FLAG(1) |                               \
     S_02881C_USE_VTX_RENDER_TARGET_INDX(1) | S_02881C_USE_VTX_VIEWPORT_INDX(1) |                   \
-    S_02881C_VS_OUT_MISC_VEC_ENA(1) | S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1))
+    S_02881C_VS_OUT_MISC_VEC_ENA(1) | S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) |                       \
+    S_02881C_USE_VTX_VRS_RATE(1))
 
 /* The list of registers whose emitted values are remembered by si_context. */
 enum si_tracked_reg
@@ -283,6 +284,7 @@ enum si_tracked_reg
 
    SI_TRACKED_PA_SC_BINNER_CNTL_0,
    SI_TRACKED_DB_DFSM_CONTROL,
+   SI_TRACKED_DB_VRS_OVERRIDE_CNTL,
 
    SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, /* 4 consecutive registers */
    SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 3d0baf04646..60441df8418 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -89,6 +89,12 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
       shader_variant_flags |= 1 << 8;
    if (sel->screen->debug_flags & DBG(GISEL))
       shader_variant_flags |= 1 << 9;
+   if ((sel->info.stage == MESA_SHADER_VERTEX ||
+        sel->info.stage == MESA_SHADER_TESS_EVAL ||
+        sel->info.stage == MESA_SHADER_GEOMETRY) &&
+       !es &&
+       sel->screen->options.vrs2x2)
+      shader_variant_flags |= 1 << 10;
 
    struct mesa_sha1 ctx;
    _mesa_sha1_init(&ctx);
@@ -1056,9 +1062,11 @@ static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel,
       writes_psize &= !shader->key.opt.kill_pointsize;
 
    bool misc_vec_ena = writes_psize || (sel->info.writes_edgeflag && !ngg) ||
+                       sel->screen->options.vrs2x2 ||
                        sel->info.writes_layer || sel->info.writes_viewport_index;
    return S_02881C_USE_VTX_POINT_SIZE(writes_psize) |
           S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag && !ngg) |
+          S_02881C_USE_VTX_VRS_RATE(sel->screen->options.vrs2x2) |
           S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) |
           S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) |
           S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |



More information about the mesa-commit mailing list