Mesa (main): radeonsi: port tess ring calcs to the common helper.
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed May 11 02:50:36 UTC 2022
Module: Mesa
Branch: main
Commit: 14b1ed1ce105d42652f70e2fd13c90fc4f2e7ffc
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=14b1ed1ce105d42652f70e2fd13c90fc4f2e7ffc
Author: Dave Airlie <airlied at redhat.com>
Date: Tue May 10 11:47:33 2022 +1000
radeonsi: port tess ring calcs to the common helper.
This uses the common helper code to implement the tess ring sizing.
One question is if radeonsi should be using tess_offchip_ring_offset
in some places it's using tess_factor_ring_size?
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16415>
---
src/gallium/drivers/radeonsi/si_pipe.c | 55 +---------------------
src/gallium/drivers/radeonsi/si_pipe.h | 5 +-
src/gallium/drivers/radeonsi/si_shader_llvm_tess.c | 2 +-
src/gallium/drivers/radeonsi/si_state_draw.cpp | 2 +-
src/gallium/drivers/radeonsi/si_state_shaders.cpp | 18 +++----
5 files changed, 13 insertions(+), 69 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 3c94693af53..0c46ef6b2e0 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1215,60 +1215,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
sscreen->max_memory_usage_kb = sscreen->info.vram_size_kb + sscreen->info.gart_size_kb / 4 * 3;
- /* Determine tessellation ring info. */
- bool double_offchip_buffers = sscreen->info.chip_class >= GFX7 &&
- sscreen->info.family != CHIP_CARRIZO &&
- sscreen->info.family != CHIP_STONEY;
- /* This must be one less than the maximum number due to a hw limitation.
- * Various hardware bugs need this.
- */
- unsigned max_offchip_buffers_per_se;
-
- if (sscreen->info.chip_class >= GFX11)
- max_offchip_buffers_per_se = 256; /* TODO: we could decrease this to reduce memory/cache usage */
- else if (sscreen->info.chip_class >= GFX10)
- max_offchip_buffers_per_se = 128;
- /* Only certain chips can use the maximum value. */
- else if (sscreen->info.family == CHIP_VEGA12 || sscreen->info.family == CHIP_VEGA20)
- max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
- else
- max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63;
-
- unsigned max_offchip_buffers = max_offchip_buffers_per_se * sscreen->info.max_se;
- unsigned offchip_granularity;
-
- /* Hawaii has a bug with offchip buffers > 256 that can be worked
- * around by setting 4K granularity.
- */
- if (sscreen->info.family == CHIP_HAWAII) {
- sscreen->tess_offchip_block_dw_size = 4096;
- offchip_granularity = V_03093C_X_4K_DWORDS;
- } else {
- sscreen->tess_offchip_block_dw_size = 8192;
- offchip_granularity = V_03093C_X_8K_DWORDS;
- }
-
- sscreen->tess_factor_ring_size = 48 * 1024 * sscreen->info.max_se;
- sscreen->tess_offchip_ring_size = max_offchip_buffers * sscreen->tess_offchip_block_dw_size * 4;
-
- if (sscreen->info.chip_class >= GFX11) {
- /* OFFCHIP_BUFFERING is per SE. */
- sscreen->vgt_hs_offchip_param =
- S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers_per_se - 1) |
- S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
- } else if (sscreen->info.chip_class >= GFX10_3) {
- sscreen->vgt_hs_offchip_param =
- S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
- S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
- } else if (sscreen->info.chip_class >= GFX7) {
- if (sscreen->info.chip_class >= GFX8)
- --max_offchip_buffers;
- sscreen->vgt_hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) |
- S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity);
- } else {
- assert(offchip_granularity == V_03093C_X_8K_DWORDS);
- sscreen->vgt_hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
- }
+ ac_get_hs_info(&sscreen->info, &sscreen->hs);
sscreen->has_draw_indirect_multi =
(sscreen->info.family >= CHIP_POLARIS10) ||
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index ebbee0b8aac..8b5b82dd6cc 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -572,10 +572,7 @@ struct si_screen {
unsigned pa_sc_raster_config_1;
unsigned se_tile_repeat;
unsigned gs_table_depth;
- unsigned tess_offchip_block_dw_size;
- unsigned tess_offchip_ring_size;
- unsigned tess_factor_ring_size;
- unsigned vgt_hs_offchip_param;
+ struct ac_hs_info hs;
unsigned eqaa_force_coverage_samples;
unsigned eqaa_force_z_samples;
unsigned eqaa_force_color_samples;
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
index 5950e1d1d01..c61bee867d2 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
@@ -350,7 +350,7 @@ static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx, enum
}
if (ring == TCS_FACTOR_RING) {
- unsigned tf_offset = ctx->screen->tess_offchip_ring_size;
+ unsigned tf_offset = ctx->screen->hs.tess_offchip_ring_size;
addr = LLVMBuildAdd(builder, addr, LLVMConstInt(ctx->ac.i32, tf_offset, 0), "");
}
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp
index eda67cd336b..cf053b4d2e2 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp
@@ -720,7 +720,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
/* Make sure the output data fits in the offchip buffer */
*num_patches =
- MIN2(*num_patches, (sctx->screen->tess_offchip_block_dw_size * 4) / output_patch_size);
+ MIN2(*num_patches, (sctx->screen->hs.tess_offchip_block_dw_size * 4) / output_patch_size);
/* Make sure that the data fits in LDS. This assumes the shaders only
* use LDS for the inputs and outputs.
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
index 785373d310c..33bb4923157 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -4022,7 +4022,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
*/
sctx->tess_rings = pipe_aligned_buffer_create(
sctx->b.screen, SI_RESOURCE_FLAG_32BIT | SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_DEFAULT,
- sctx->screen->tess_offchip_ring_size + sctx->screen->tess_factor_ring_size, 2 * 1024 * 1024);
+ sctx->screen->hs.tess_offchip_ring_size + sctx->screen->hs.tess_factor_ring_size, 2 * 1024 * 1024);
if (!sctx->tess_rings)
return;
@@ -4031,13 +4031,13 @@ void si_init_tess_factor_ring(struct si_context *sctx)
sctx->b.screen,
PIPE_RESOURCE_FLAG_ENCRYPTED | SI_RESOURCE_FLAG_32BIT | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_USAGE_DEFAULT,
- sctx->screen->tess_offchip_ring_size + sctx->screen->tess_factor_ring_size, 2 * 1024 * 1024);
+ sctx->screen->hs.tess_offchip_ring_size + sctx->screen->hs.tess_factor_ring_size, 2 * 1024 * 1024);
}
uint64_t factor_va =
- si_resource(sctx->tess_rings)->gpu_address + sctx->screen->tess_offchip_ring_size;
+ si_resource(sctx->tess_rings)->gpu_address + sctx->screen->hs.tess_offchip_ring_size;
- unsigned tf_ring_size_field = sctx->screen->tess_factor_ring_size / 4;
+ unsigned tf_ring_size_field = sctx->screen->hs.tess_factor_ring_size / 4;
if (sctx->chip_class >= GFX11)
tf_ring_size_field /= sctx->screen->info.max_se;
@@ -4067,7 +4067,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
S_030944_BASE_HI(factor_va >> 40));
}
radeon_set_uconfig_reg(R_03093C_VGT_HS_OFFCHIP_PARAM,
- sctx->screen->vgt_hs_offchip_param);
+ sctx->screen->hs.hs_offchip_param);
radeon_end();
return;
}
@@ -4087,7 +4087,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
si_pm4_set_reg(sctx->cs_preamble_state, R_030944_VGT_TF_MEMORY_BASE_HI,
S_030944_BASE_HI(factor_va >> 40));
si_pm4_set_reg(sctx->cs_preamble_state, R_03093C_VGT_HS_OFFCHIP_PARAM,
- sctx->screen->vgt_hs_offchip_param);
+ sctx->screen->hs.hs_offchip_param);
} else {
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
@@ -4095,18 +4095,18 @@ void si_init_tess_factor_ring(struct si_context *sctx)
S_008988_SIZE(tf_ring_size_field));
si_pm4_set_reg(pm4, R_0089B8_VGT_TF_MEMORY_BASE, factor_va >> 8);
si_pm4_set_reg(pm4, R_0089B0_VGT_HS_OFFCHIP_PARAM,
- sctx->screen->vgt_hs_offchip_param);
+ sctx->screen->hs.hs_offchip_param);
sctx->cs_preamble_tess_rings = pm4;
if (sctx->screen->info.has_tmz_support) {
pm4 = CALLOC_STRUCT(si_pm4_state);
uint64_t factor_va_tmz =
- si_resource(sctx->tess_rings_tmz)->gpu_address + sctx->screen->tess_offchip_ring_size;
+ si_resource(sctx->tess_rings_tmz)->gpu_address + sctx->screen->hs.tess_offchip_ring_size;
si_pm4_set_reg(pm4, R_008988_VGT_TF_RING_SIZE,
S_008988_SIZE(tf_ring_size_field));
si_pm4_set_reg(pm4, R_0089B8_VGT_TF_MEMORY_BASE, factor_va_tmz >> 8);
si_pm4_set_reg(pm4, R_0089B0_VGT_HS_OFFCHIP_PARAM,
- sctx->screen->vgt_hs_offchip_param);
+ sctx->screen->hs.hs_offchip_param);
sctx->cs_preamble_tess_rings_tmz = pm4;
}
}
More information about the mesa-commit
mailing list