[Mesa-dev] [PATCH 02/13] radeonsi: move tessellation ring info into si_screen
Marek Olšák
maraeo at gmail.com
Sat Feb 17 19:43:17 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
---
src/gallium/drivers/radeonsi/si_pipe.c | 38 ++++++++++++++++-
src/gallium/drivers/radeonsi/si_pipe.h | 3 ++
src/gallium/drivers/radeonsi/si_state_shaders.c | 56 ++++++-------------------
3 files changed, 52 insertions(+), 45 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index f07ec50..83133cb 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -742,25 +742,59 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
si_destroy_shader_cache(sscreen);
FREE(sscreen);
return NULL;
}
si_handle_env_var_force_family(sscreen);
if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false))
si_init_perfcounters(sscreen);
+ /* Determine tessellation ring info. */
+ bool double_offchip_buffers = sscreen->info.chip_class >= CIK &&
+ sscreen->info.family != CHIP_CARRIZO &&
+ sscreen->info.family != CHIP_STONEY;
+ /* This must be one less than the maximum number due to a hw limitation.
+ * Various hardware bugs in SI, CIK, and GFX9 need this.
+ */
+ unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63;
+ unsigned max_offchip_buffers = max_offchip_buffers_per_se *
+ sscreen->info.max_se;
+ unsigned offchip_granularity;
+
/* Hawaii has a bug with offchip buffers > 256 that can be worked
* around by setting 4K granularity.
*/
- sscreen->tess_offchip_block_dw_size =
- sscreen->info.family == CHIP_HAWAII ? 4096 : 8192;
+ if (sscreen->info.family == CHIP_HAWAII) {
+ sscreen->tess_offchip_block_dw_size = 4096;
+ offchip_granularity = V_03093C_X_4K_DWORDS;
+ } else {
+ sscreen->tess_offchip_block_dw_size = 8192;
+ offchip_granularity = V_03093C_X_8K_DWORDS;
+ }
+
+ sscreen->tess_factor_ring_size = 32768 * sscreen->info.max_se;
+ assert(((sscreen->tess_factor_ring_size / 4) & C_030938_SIZE) == 0);
+ sscreen->tess_offchip_ring_size = max_offchip_buffers *
+ sscreen->tess_offchip_block_dw_size * 4;
+
+ if (sscreen->info.chip_class >= CIK) {
+ if (sscreen->info.chip_class >= VI)
+ --max_offchip_buffers;
+ sscreen->vgt_hs_offchip_param =
+ S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
+ S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
+ } else {
+ assert(offchip_granularity == V_03093C_X_8K_DWORDS);
+ sscreen->vgt_hs_offchip_param =
+ S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
+ }
/* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
* on SI. */
sscreen->has_clear_state = sscreen->info.chip_class >= CIK;
sscreen->has_distributed_tess =
sscreen->info.chip_class >= VI &&
sscreen->info.max_se >= 2;
sscreen->has_draw_indirect_multi =
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 3a959f9..7b23e8c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -97,20 +97,23 @@ struct si_screen {
struct pipe_screen b;
struct radeon_winsys *ws;
struct disk_cache *disk_shader_cache;
struct radeon_info info;
uint64_t debug_flags;
char renderer_string[100];
unsigned gs_table_depth;
unsigned tess_offchip_block_dw_size;
+ unsigned tess_offchip_ring_size;
+ unsigned tess_factor_ring_size;
+ unsigned vgt_hs_offchip_param;
bool has_clear_state;
bool has_distributed_tess;
bool has_draw_indirect_multi;
bool has_out_of_order_rast;
bool assume_no_z_fights;
bool commutative_blend_add;
bool clear_db_cache_before_clear;
bool has_msaa_sample_loc_bug;
bool has_ls_vgpr_init_bug;
bool dpbb_allowed;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 2cd48f5..9c505ff 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2945,102 +2945,72 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
if (spi_tmpring_size != sctx->spi_tmpring_size) {
sctx->spi_tmpring_size = spi_tmpring_size;
si_mark_atom_dirty(sctx, &sctx->scratch_state);
}
return true;
}
static void si_init_tess_factor_ring(struct si_context *sctx)
{
- bool double_offchip_buffers = sctx->b.chip_class >= CIK &&
- sctx->b.family != CHIP_CARRIZO &&
- sctx->b.family != CHIP_STONEY;
- /* This must be one less than the maximum number due to a hw limitation.
- * Various hardware bugs in SI, CIK, and GFX9 need this.
- */
- unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63;
- unsigned max_offchip_buffers = max_offchip_buffers_per_se *
- sctx->screen->info.max_se;
- unsigned offchip_granularity;
-
- switch (sctx->screen->tess_offchip_block_dw_size) {
- default:
- assert(0);
- /* fall through */
- case 8192:
- offchip_granularity = V_03093C_X_8K_DWORDS;
- break;
- case 4096:
- offchip_granularity = V_03093C_X_4K_DWORDS;
- break;
- }
-
assert(!sctx->tf_ring);
+
/* Use 64K alignment for both rings, so that we can pass the address
* to shaders as one SGPR containing bits [16:47].
*/
sctx->tf_ring = si_aligned_buffer_create(sctx->b.b.screen,
- R600_RESOURCE_FLAG_UNMAPPABLE,
- PIPE_USAGE_DEFAULT,
- 32768 * sctx->screen->info.max_se,
- 64 * 1024);
+ R600_RESOURCE_FLAG_UNMAPPABLE,
+ PIPE_USAGE_DEFAULT,
+ sctx->screen->tess_factor_ring_size,
+ 64 * 1024);
if (!sctx->tf_ring)
return;
- assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
-
sctx->tess_offchip_ring =
si_aligned_buffer_create(sctx->b.b.screen,
- R600_RESOURCE_FLAG_UNMAPPABLE,
- PIPE_USAGE_DEFAULT,
- max_offchip_buffers *
- sctx->screen->tess_offchip_block_dw_size * 4,
- 64 * 1024);
+ R600_RESOURCE_FLAG_UNMAPPABLE,
+ PIPE_USAGE_DEFAULT,
+ sctx->screen->tess_offchip_ring_size,
+ 64 * 1024);
if (!sctx->tess_offchip_ring)
return;
si_init_config_add_vgt_flush(sctx);
uint64_t offchip_va = r600_resource(sctx->tess_offchip_ring)->gpu_address;
uint64_t factor_va = r600_resource(sctx->tf_ring)->gpu_address;
assert((offchip_va & 0xffff) == 0);
assert((factor_va & 0xffff) == 0);
si_pm4_add_bo(sctx->init_config, r600_resource(sctx->tess_offchip_ring),
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS);
si_pm4_add_bo(sctx->init_config, r600_resource(sctx->tf_ring),
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS);
/* Append these registers to the init config state. */
if (sctx->b.chip_class >= CIK) {
- if (sctx->b.chip_class >= VI)
- --max_offchip_buffers;
-
si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE,
- S_030938_SIZE(sctx->tf_ring->width0 / 4));
+ S_030938_SIZE(sctx->screen->tess_factor_ring_size / 4));
si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE,
factor_va >> 8);
if (sctx->b.chip_class >= GFX9)
si_pm4_set_reg(sctx->init_config, R_030944_VGT_TF_MEMORY_BASE_HI,
factor_va >> 40);
si_pm4_set_reg(sctx->init_config, R_03093C_VGT_HS_OFFCHIP_PARAM,
- S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
- S_03093C_OFFCHIP_GRANULARITY(offchip_granularity));
+ sctx->screen->vgt_hs_offchip_param);
} else {
- assert(offchip_granularity == V_03093C_X_8K_DWORDS);
si_pm4_set_reg(sctx->init_config, R_008988_VGT_TF_RING_SIZE,
- S_008988_SIZE(sctx->tf_ring->width0 / 4));
+ S_008988_SIZE(sctx->screen->tess_factor_ring_size / 4));
si_pm4_set_reg(sctx->init_config, R_0089B8_VGT_TF_MEMORY_BASE,
factor_va >> 8);
si_pm4_set_reg(sctx->init_config, R_0089B0_VGT_HS_OFFCHIP_PARAM,
- S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers));
+ sctx->screen->vgt_hs_offchip_param);
}
if (sctx->b.chip_class >= GFX9) {
si_pm4_set_reg(sctx->init_config,
R_00B430_SPI_SHADER_USER_DATA_LS_0 +
GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K * 4,
offchip_va >> 16);
si_pm4_set_reg(sctx->init_config,
R_00B430_SPI_SHADER_USER_DATA_LS_0 +
GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K * 4,
--
2.7.4
More information about the mesa-dev
mailing list