Mesa (master): ac,radeonsi: start adding support for gfx10.3

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Jun 9 16:46:25 UTC 2020


Module: Mesa
Branch: master
Commit: a23802bcb9a42a02d34a5a36d6e66d6532813a0d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a23802bcb9a42a02d34a5a36d6e66d6532813a0d

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Thu Mar 26 22:02:13 2020 -0400

ac,radeonsi: start adding support for gfx10.3

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5383>

---

 src/amd/common/ac_gpu_info.c                    |  7 +++--
 src/amd/common/ac_surface.c                     |  2 ++
 src/amd/common/amd_family.h                     |  1 +
 src/amd/registers/gfx10.json                    | 41 ++++++++++++++++++++-----
 src/gallium/drivers/radeonsi/si_perfcounter.c   |  1 +
 src/gallium/drivers/radeonsi/si_pipe.c          |  8 +++--
 src/gallium/drivers/radeonsi/si_state.c         | 17 +++++++---
 src/gallium/drivers/radeonsi/si_state_shaders.c |  4 ++-
 8 files changed, 64 insertions(+), 17 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index a8a43fdc8ee..517de226bd9 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -582,7 +582,8 @@ bool ac_query_gpu_info(int fd, void *dev_p,
 			        info->family == CHIP_VEGA12 ||
 			        info->family == CHIP_RAVEN ||
 			        info->family == CHIP_RAVEN2 ||
-			        info->family == CHIP_RENOIR);
+			        info->family == CHIP_RENOIR ||
+				info->chip_class >= GFX10_3);
 
 	info->has_out_of_order_rast = info->chip_class >= GFX8 &&
 				      info->chip_class <= GFX9 &&
@@ -736,7 +737,9 @@ bool ac_query_gpu_info(int fd, void *dev_p,
 	if (info->chip_class >= GFX10)
 		info->num_sdp_interfaces = device_info.num_tcc_blocks;
 
-	if (info->chip_class >= GFX10)
+	if (info->chip_class >= GFX10_3)
+		info->max_wave64_per_simd = 16;
+	else if (info->chip_class == GFX10)
 		info->max_wave64_per_simd = 20;
 	else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
 		info->max_wave64_per_simd = 8;
diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index cbbd86093e7..d7dd9561f6f 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -2127,6 +2127,7 @@ bool ac_surface_set_umd_metadata(const struct radeon_info *info,
          break;
 
       case GFX10:
+      case GFX10_3:
          surf->dcc_offset =
             ((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc[6]) << 8) | ((uint64_t)desc[7] << 16);
          surf->u.gfx9.dcc.pipe_aligned = G_00A018_META_PIPE_ALIGNED(desc[6]);
@@ -2169,6 +2170,7 @@ void ac_surface_get_umd_metadata(const struct radeon_info *info,
       desc[5] |= S_008F24_META_DATA_ADDRESS(surf->dcc_offset >> 40);
       break;
    case GFX10:
+   case GFX10_3:
       desc[6] &= C_00A018_META_DATA_ADDRESS_LO;
       desc[6] |= S_00A018_META_DATA_ADDRESS_LO(surf->dcc_offset >> 8);
       desc[7] = surf->dcc_offset >> 16;
diff --git a/src/amd/common/amd_family.h b/src/amd/common/amd_family.h
index ffcc1bd9240..8262a3a40b7 100644
--- a/src/amd/common/amd_family.h
+++ b/src/amd/common/amd_family.h
@@ -119,6 +119,7 @@ enum chip_class {
     GFX8,
     GFX9,
     GFX10,
+    GFX10_3,
 };
 
 enum ring_type {
diff --git a/src/amd/registers/gfx10.json b/src/amd/registers/gfx10.json
index 08f111c87c7..5c2b251ca43 100644
--- a/src/amd/registers/gfx10.json
+++ b/src/amd/registers/gfx10.json
@@ -16370,6 +16370,12 @@
    "name": "SX_PERFCOUNTER3_SELECT",
    "type_ref": "SX_PERFCOUNTER0_SELECT"
   },
+  {
+   "chips": ["gfx10"],
+   "map": {"at": 165712, "to": "mm"},
+   "name": "SX_PS_DOWNCONVERT_CONTROL_GFX103",
+   "type_ref": "SX_PS_DOWNCONVERT_CONTROL"
+  },
   {
    "chips": ["gfx10"],
    "map": {"at": 165716, "to": "mm"},
@@ -17248,7 +17254,9 @@
     {"bits": [14, 17], "name": "LOSSY_ALPHA_PRECISION"},
     {"bits": [18, 18], "name": "DISABLE_CONSTANT_ENCODE_REG"},
     {"bits": [19, 19], "name": "ENABLE_CONSTANT_ENCODE_REG_WRITE"},
-    {"bits": [20, 20], "name": "INDEPENDENT_128B_BLOCKS"}
+    {"bits": [20, 20], "name": "INDEPENDENT_128B_BLOCKS"},
+    {"bits": [21, 21], "name": "SKIP_LOW_COMP_RATIO_GFX103"},
+    {"bits": [22, 22], "name": "DCC_COMPRESS_DISABLE_GFX103"}
    ]
   },
   "CB_COLOR0_INFO": {
@@ -18715,7 +18723,8 @@
     {"bits": [21, 21], "name": "PRESERVE_ZRANGE"},
     {"bits": [22, 22], "name": "PRESERVE_SRESULTS"},
     {"bits": [23, 23], "name": "DISABLE_FAST_PASS"},
-    {"bits": [25, 25], "name": "ALLOW_PARTIAL_RES_HIER_KILL"}
+    {"bits": [25, 25], "name": "ALLOW_PARTIAL_RES_HIER_KILL"},
+    {"bits": [27, 28], "name": "CENTROID_COMPUTATION_MODE_GFX103"}
    ]
   },
   "DB_RMI_L2_CACHE_CONTROL": {
@@ -19426,7 +19435,8 @@
   "PA_CL_NGG_CNTL": {
    "fields": [
     {"bits": [0, 0], "name": "VERTEX_REUSE_OFF"},
-    {"bits": [1, 1], "name": "INDEX_BUF_EDGE_FLAG_ENA"}
+    {"bits": [1, 1], "name": "INDEX_BUF_EDGE_FLAG_ENA"},
+    {"bits": [2, 9], "name": "VERTEX_REUSE_DEPTH_GFX103"}
    ]
   },
   "PA_CL_OBJPRIM_ID_CNTL": {
@@ -19493,8 +19503,9 @@
     {"bits": [23, 23], "name": "VS_OUT_CCDIST1_VEC_ENA"},
     {"bits": [24, 24], "name": "VS_OUT_MISC_SIDE_BUS_ENA"},
     {"bits": [25, 25], "name": "USE_VTX_GS_CUT_FLAG"},
-    {"bits": [26, 26], "name": "USE_VTX_SHD_OBJPRIM_ID"},
-    {"bits": [27, 27], "name": "USE_VTX_LINE_WIDTH"}
+    {"bits": [27, 27], "name": "USE_VTX_LINE_WIDTH"},
+    {"bits": [29, 29], "name": "BYPASS_VTX_RATE_COMBINER_GFX103"},
+    {"bits": [30, 30], "name": "BYPASS_PRIM_RATE_COMBINER_GFX103"}
    ]
   },
   "PA_CL_VTE_CNTL": {
@@ -19540,7 +19551,9 @@
     {"bits": [13, 16], "name": "MAX_SAMPLE_DIST"},
     {"bits": [20, 22], "name": "MSAA_EXPOSED_SAMPLES"},
     {"bits": [24, 25], "name": "DETAIL_TO_EXPOSED_MODE"},
-    {"bits": [26, 27], "enum_ref": "CovToShaderSel", "name": "COVERAGE_TO_SHADER_SELECT"}
+    {"bits": [26, 27], "enum_ref": "CovToShaderSel", "name": "COVERAGE_TO_SHADER_SELECT"},
+    {"bits": [28, 28], "name": "SAMPLE_COVERAGE_ENCODING_GFX103"},
+    {"bits": [29, 29], "name": "COVERED_CENTROID_IS_CENTER_GFX103"}
    ]
   },
   "PA_SC_AA_MASK_X0Y0_X1Y0": {
@@ -21581,6 +21594,18 @@
     {"bits": [10, 19], "name": "PERFCOUNTER_SELECT3"}
    ]
   },
+  "SX_PS_DOWNCONVERT_CONTROL": {
+   "fields": [
+    {"bits": [0, 0], "name": "MRT0_FMT_MAPPING_DISABLE"},
+    {"bits": [1, 1], "name": "MRT1_FMT_MAPPING_DISABLE"},
+    {"bits": [2, 2], "name": "MRT2_FMT_MAPPING_DISABLE"},
+    {"bits": [3, 3], "name": "MRT3_FMT_MAPPING_DISABLE"},
+    {"bits": [4, 4], "name": "MRT4_FMT_MAPPING_DISABLE"},
+    {"bits": [5, 5], "name": "MRT5_FMT_MAPPING_DISABLE"},
+    {"bits": [6, 6], "name": "MRT6_FMT_MAPPING_DISABLE"},
+    {"bits": [7, 7], "name": "MRT7_FMT_MAPPING_DISABLE"}
+   ]
+  },
   "SX_PS_DOWNCONVERT": {
    "fields": [
     {"bits": [0, 3], "enum_ref": "SX_DOWNCONVERT_FORMAT", "name": "MRT0"},
@@ -21836,7 +21861,9 @@
   "VGT_HS_OFFCHIP_PARAM_UMD": {
    "fields": [
     {"bits": [0, 8], "name": "OFFCHIP_BUFFERING"},
-    {"bits": [9, 10], "name": "OFFCHIP_GRANULARITY"}
+    {"bits": [9, 10], "name": "OFFCHIP_GRANULARITY"},
+    {"bits": [0, 9], "name": "OFFCHIP_BUFFERING_GFX103"},
+    {"bits": [10, 11], "name": "OFFCHIP_GRANULARITY_GFX103"}
    ]
   },
   "VGT_INSTANCE_BASE_ID": {
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c
index d6b3fc85767..8825926064d 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -1438,6 +1438,7 @@ void si_init_perfcounters(struct si_screen *screen)
       num_blocks = ARRAY_SIZE(groups_gfx9);
       break;
    case GFX10:
+   case GFX10_3:
       blocks = groups_gfx10;
       num_blocks = ARRAY_SIZE(groups_gfx10);
       break;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 1d14442b445..7fdbfa24c57 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1088,7 +1088,11 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
    sscreen->tess_factor_ring_size = 32768 * sscreen->info.max_se;
    sscreen->tess_offchip_ring_size = max_offchip_buffers * sscreen->tess_offchip_block_dw_size * 4;
 
-   if (sscreen->info.chip_class >= GFX7) {
+   if (sscreen->info.chip_class >= GFX10_3) {
+      sscreen->vgt_hs_offchip_param =
+            S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
+            S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
+   } else if (sscreen->info.chip_class >= GFX7) {
       if (sscreen->info.chip_class >= GFX8)
          --max_offchip_buffers;
       sscreen->vgt_hs_offchip_param = S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
@@ -1125,7 +1129,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
    /* Only enable primitive binning on APUs by default. */
    if (sscreen->info.chip_class >= GFX10) {
       sscreen->dpbb_allowed = true;
-      sscreen->dfsm_allowed = !sscreen->info.has_dedicated_vram;
+      /* DFSM is not supported on GFX 10.3 and not beneficial on Navi1x. */
    } else if (sscreen->info.chip_class == GFX9) {
       sscreen->dpbb_allowed = !sscreen->info.has_dedicated_vram;
       sscreen->dfsm_allowed = !sscreen->info.has_dedicated_vram;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index ecce673caf2..b59f28e028d 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -757,8 +757,9 @@ static void si_emit_clip_regs(struct si_context *sctx)
 
    unsigned initial_cdw = sctx->gfx_cs->current.cdw;
    unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
-                         S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | clipdist_mask |
-                         (culldist_mask << 8);
+                         S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
+                         S_02881C_BYPASS_PRIM_RATE_COMBINER_GFX103(sctx->chip_class >= GFX10_3) |
+                         clipdist_mask | (culldist_mask << 8);
 
    if (sctx->chip_class >= GFX10) {
       radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
@@ -1384,8 +1385,9 @@ static void si_emit_db_render_state(struct si_context *sctx)
    radeon_opt_set_context_reg(
       sctx, R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2,
       S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
-         S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
-         S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4));
+      S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
+      S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) |
+      S_028010_CENTROID_COMPUTATION_MODE_GFX103(sctx->chip_class >= GFX10_3 ? 2 : 0));
 
    db_shader_control = sctx->ps_db_shader_control;
 
@@ -3535,7 +3537,8 @@ static void si_emit_msaa_config(struct si_context *sctx)
       sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1);
       sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
                      S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
-                     S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples);
+                     S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) |
+                     S_028BE0_COVERED_CENTROID_IS_CENTER_GFX103(sctx->chip_class >= GFX10_3);
 
       if (sctx->framebuffer.nr_samples > 1) {
          db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) |
@@ -5329,6 +5332,7 @@ static void si_init_config(struct si_context *sctx)
        * a single primitive shader subgroup.
        */
       si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512));
+      /* Reuse for legacy (non-NGG) only. */
       si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
 
       if (!has_clear_state) {
@@ -5370,6 +5374,9 @@ static void si_init_config(struct si_context *sctx)
                      S_00B0C0_SOFT_GROUPING_EN(1) | S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
       si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
    }
+   if (sctx->chip_class >= GFX10_3) {
+      si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL_GFX103, 0xff);
+   }
 
    if (sctx->chip_class >= GFX9) {
       si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 0fd1714f8f8..520eeada9e9 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1217,7 +1217,9 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
     * this.
     */
    shader->ctx_reg.ngg.pa_cl_ngg_cntl =
-      S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_type == PIPE_SHADER_VERTEX);
+      S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_type == PIPE_SHADER_VERTEX) |
+      /* Reuse for NGG. */
+      S_028838_VERTEX_REUSE_DEPTH_GFX103(sscreen->info.chip_class >= GFX10_3 ? 30 : 0);
    shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(gs_sel, true);
 
    /* Oversubscribe PC. This improves performance when there are too many varyings. */



More information about the mesa-commit mailing list