Mesa (main): radeonsi: apply spi_cu_en to CU_EN

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Jan 5 07:07:30 UTC 2022


Module: Mesa
Branch: main
Commit: 384014bebe10cc56238bece7499bb23e5e6eff96
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=384014bebe10cc56238bece7499bb23e5e6eff96

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Wed Dec  8 02:15:50 2021 -0500

radeonsi: apply spi_cu_en to CU_EN

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14122>

---

 src/amd/common/ac_shader_util.c                   | 18 ++++++
 src/amd/common/ac_shader_util.h                   |  4 ++
 src/gallium/drivers/radeonsi/si_build_pm4.h       |  9 +++
 src/gallium/drivers/radeonsi/si_pm4.h             |  3 +-
 src/gallium/drivers/radeonsi/si_state.c           | 29 +++++----
 src/gallium/drivers/radeonsi/si_state_shaders.cpp | 73 ++++++++++++++++-------
 6 files changed, 104 insertions(+), 32 deletions(-)

diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c
index 943523b88d9..52c87e56c67 100644
--- a/src/amd/common/ac_shader_util.c
+++ b/src/amd/common/ac_shader_util.c
@@ -581,3 +581,21 @@ unsigned ac_compute_ngg_workgroup_size(unsigned es_verts, unsigned gs_inst_prims
 
    return CLAMP(workgroup_size, 1, 256);
 }
+
+void ac_set_reg_cu_en(void *cs, unsigned reg_offset, uint32_t value, uint32_t clear_mask,
+                      unsigned value_shift, const struct radeon_info *info,
+                      void set_sh_reg(void*, unsigned, uint32_t))
+{
+   /* Register field position and mask. */
+   uint32_t cu_en_mask = ~clear_mask;
+   unsigned cu_en_shift = ffs(cu_en_mask) - 1;
+   /* The value being set. */
+   uint32_t cu_en = (value & cu_en_mask) >> cu_en_shift;
+
+   /* AND the field by spi_cu_en. */
+   uint32_t spi_cu_en = info->spi_cu_en >> value_shift;
+   uint32_t new_value = (value & ~cu_en_mask) |
+                        (((cu_en & spi_cu_en) << cu_en_shift) & cu_en_mask);
+
+   set_sh_reg(cs, reg_offset, new_value);
+}
diff --git a/src/amd/common/ac_shader_util.h b/src/amd/common/ac_shader_util.h
index fcf4e48ca15..b86e81ca039 100644
--- a/src/amd/common/ac_shader_util.h
+++ b/src/amd/common/ac_shader_util.h
@@ -118,6 +118,10 @@ unsigned ac_compute_esgs_workgroup_size(enum chip_class chip_class, unsigned wav
 unsigned ac_compute_ngg_workgroup_size(unsigned es_verts, unsigned gs_inst_prims,
                                        unsigned max_vtx_out, unsigned prim_amp_factor);
 
+void ac_set_reg_cu_en(void *cs, unsigned reg_offset, uint32_t value, uint32_t clear_mask,
+                      unsigned value_shift, const struct radeon_info *info,
+                      void set_sh_reg(void*, unsigned, uint32_t));
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h b/src/gallium/drivers/radeonsi/si_build_pm4.h
index 66589d6a0b3..6b461c9db6c 100644
--- a/src/gallium/drivers/radeonsi/si_build_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_build_pm4.h
@@ -279,6 +279,15 @@
    radeon_emit_32bit_pointer(sctx->screen, (desc)->gpu_address); \
 } while (0)
 
+/* Wrappers that are only used when they are passed as function pointers. */
+static inline void radeon_set_sh_reg_func(struct radeon_cmdbuf *cs, unsigned reg_offset,
+                                          uint32_t value)
+{
+   radeon_begin(cs);
+   radeon_set_sh_reg(reg_offset, value);
+   radeon_end();
+}
+
 /* This should be evaluated at compile time if all parameters are constants. */
 static ALWAYS_INLINE unsigned
 si_get_user_data_base(enum chip_class chip_class, enum si_has_tess has_tess,
diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h
index 03f79e0ba30..8946018829d 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -31,7 +31,8 @@
 extern "C" {
 #endif
 
-#define SI_PM4_MAX_DW 176
+/* TODO: This is high because of cs_preamble with ac_set_reg_cu_en. */
+#define SI_PM4_MAX_DW 480
 
 // forward defines
 struct si_context;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index e82235d7f51..8d242b85c40 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -5467,8 +5467,9 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
       cu_mask_ps = u_bit_consecutive(0, sscreen->info.min_good_cu_per_sa);
 
    if (sctx->chip_class >= GFX7) {
-      si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
-                     S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F));
+      ac_set_reg_cu_en(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
+                       S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F),
+                       C_00B01C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg);
    }
 
    if (sctx->chip_class <= GFX8) {
@@ -5503,11 +5504,13 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
    }
 
    if (sctx->chip_class >= GFX7 && sctx->chip_class <= GFX8) {
-      si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
-                     S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
+      ac_set_reg_cu_en(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
+                       S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F),
+                       C_00B51C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg);
       si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F));
-      si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
-                     S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
+      ac_set_reg_cu_en(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
+                       S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F),
+                       C_00B31C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg);
 
       /* If this is 0, Bonaire can hang even if GS isn't being used.
        * Other chips are unaffected. These are suboptimal values,
@@ -5547,8 +5550,9 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
    }
 
    if (sctx->chip_class >= GFX9) {
-      si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
-                     S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
+      ac_set_reg_cu_en(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+                       S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F), C_00B41C_CU_EN,
+                       0, &sscreen->info, (void*)si_pm4_set_reg);
 
       si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
                      S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) | S_028B50_ACCUM_QUAD(24) |
@@ -5566,9 +5570,12 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
 
    if (sctx->chip_class >= GFX10) {
       /* Logical CUs 16 - 31 */
-      si_pm4_set_reg(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16));
-      si_pm4_set_reg(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff));
-      si_pm4_set_reg(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff));
+      ac_set_reg_cu_en(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16),
+                       C_00B004_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg);
+      ac_set_reg_cu_en(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff),
+                       C_00B104_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg);
+      ac_set_reg_cu_en(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff),
+                       C_00B404_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg);
 
       si_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);
       si_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
index c32a014270b..9b4d6503f18 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -919,18 +919,35 @@ static void si_emit_shader_gs(struct si_context *sctx)
    radeon_end_update_context_roll(sctx);
 
    /* These don't cause any context rolls. */
-   radeon_begin_again(&sctx->gfx_cs);
-   if (sctx->chip_class >= GFX7) {
-      radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
-                            SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
-                            shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs);
-   }
-   if (sctx->chip_class >= GFX10) {
-      radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
-                            SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
-                            shader->ctx_reg.gs.spi_shader_pgm_rsrc4_gs);
+   if (sctx->screen->info.spi_cu_en_has_effect) {
+      if (sctx->chip_class >= GFX7) {
+         ac_set_reg_cu_en(&sctx->gfx_cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+                          shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs,
+                          C_00B21C_CU_EN, 0, &sctx->screen->info,
+                          (void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func);
+         sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS);
+      }
+      if (sctx->chip_class >= GFX10) {
+         ac_set_reg_cu_en(&sctx->gfx_cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
+                          shader->ctx_reg.gs.spi_shader_pgm_rsrc4_gs,
+                          C_00B204_CU_EN, 16, &sctx->screen->info,
+                          (void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func);
+         sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS);
+      }
+   } else {
+      radeon_begin_again(&sctx->gfx_cs);
+      if (sctx->chip_class >= GFX7) {
+         radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+                               SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
+                               shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs);
+      }
+      if (sctx->chip_class >= GFX10) {
+         radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
+                               SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
+                               shader->ctx_reg.gs.spi_shader_pgm_rsrc4_gs);
+      }
+      radeon_end();
    }
-   radeon_end();
 }
 
 static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
@@ -1129,13 +1146,27 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader
    radeon_begin_again(&sctx->gfx_cs);
    radeon_opt_set_uconfig_reg(sctx, R_030980_GE_PC_ALLOC, SI_TRACKED_GE_PC_ALLOC,
                               shader->ctx_reg.ngg.ge_pc_alloc);
-   radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
-                         SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
-                         shader->ctx_reg.ngg.spi_shader_pgm_rsrc3_gs);
-   radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
-                         SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
-                         shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs);
-   radeon_end();
+   if (sctx->screen->info.spi_cu_en_has_effect) {
+      radeon_end();
+      ac_set_reg_cu_en(&sctx->gfx_cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+                       shader->ctx_reg.ngg.spi_shader_pgm_rsrc3_gs,
+                       C_00B21C_CU_EN, 0, &sctx->screen->info,
+                       (void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func);
+      ac_set_reg_cu_en(&sctx->gfx_cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
+                       shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs,
+                       C_00B204_CU_EN, 16, &sctx->screen->info,
+                       (void (*)(void*, unsigned, uint32_t))radeon_set_sh_reg_func);
+      sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS) &
+                                      ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS);
+   } else {
+      radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+                            SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
+                            shader->ctx_reg.ngg.spi_shader_pgm_rsrc3_gs);
+      radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
+                            SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
+                            shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs);
+      radeon_end();
+   }
 }
 
 static void gfx10_emit_shader_ngg_notess_nogs(struct si_context *sctx)
@@ -1599,8 +1630,10 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
    oc_lds_en = shader->selector->info.stage == MESA_SHADER_TESS_EVAL ? 1 : 0;
 
    if (sscreen->info.chip_class >= GFX7) {
-      si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
-                     S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F));
+      ac_set_reg_cu_en(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
+                       S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F),
+                       C_00B118_CU_EN, 0, &sscreen->info,
+                       (void (*)(void*, unsigned, uint32_t))si_pm4_set_reg);
       si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64));
    }
 



More information about the mesa-commit mailing list