[Mesa-dev] [PATCH 01/13] radeonsi: move TCS_OUT_LAYOUT.PatchVerticesIn to lower bits
Marek Olšák
maraeo at gmail.com
Sat Feb 17 19:43:16 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
For a later patch.
---
src/gallium/drivers/radeonsi/si_shader.c | 2 +-
src/gallium/drivers/radeonsi/si_shader_internal.h | 2 +-
src/gallium/drivers/radeonsi/si_state_draw.c | 7 ++++---
3 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 1f2338a..cc57ba3 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2005,21 +2005,21 @@ static LLVMValueRef si_load_tess_level(struct ac_shader_abi *abi,
}
return load_tess_level(ctx, semantic_name);
}
static LLVMValueRef si_load_patch_vertices_in(struct ac_shader_abi *abi)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
if (ctx->type == PIPE_SHADER_TESS_CTRL)
- return unpack_param(ctx, ctx->param_tcs_out_lds_layout, 26, 6);
+ return unpack_param(ctx, ctx->param_tcs_out_lds_layout, 13, 6);
else if (ctx->type == PIPE_SHADER_TESS_EVAL)
return get_num_tcs_out_vertices(ctx);
else
unreachable("invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
}
void si_load_system_value(struct si_shader_context *ctx,
unsigned index,
const struct tgsi_full_declaration *decl)
{
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 571df55..a9883d5 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -155,21 +155,21 @@ struct si_shader_context {
/* API TCS */
/* Offsets where TCS outputs and TCS patch outputs live in LDS:
* [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
* [16:31] = TCS output patch0 offset for per-patch / 16
* max = (NUM_PATCHES + 1) * 32*32
*/
int param_tcs_out_lds_offsets;
/* Layout of TCS outputs / TES inputs:
* [0:12] = stride between output patches in DW, num_outputs * num_vertices * 4
* max = 32*32*4 + 32*4
- * [26:31] = gl_PatchVerticesIn, max = 32
+ * [13:18] = gl_PatchVerticesIn, max = 32
*/
int param_tcs_out_lds_layout;
int param_tcs_offchip_addr_base64k;
int param_tcs_factor_addr_base64k;
int param_tcs_offchip_offset;
int param_tcs_factor_offset;
/* API TES */
int param_tes_u;
int param_tes_v;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 06ef84d..b245a38 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -226,21 +226,22 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
assert(((output_vertex_size / 4) & ~0xff) == 0);
assert(((input_patch_size / 4) & ~0x1fff) == 0);
assert(((output_patch_size / 4) & ~0x1fff) == 0);
assert(((output_patch0_offset / 16) & ~0xffff) == 0);
assert(((perpatch_output_offset / 16) & ~0xffff) == 0);
assert(num_tcs_input_cp <= 32);
assert(num_tcs_output_cp <= 32);
tcs_in_layout = S_VS_STATE_LS_OUT_PATCH_SIZE(input_patch_size / 4) |
S_VS_STATE_LS_OUT_VERTEX_SIZE(input_vertex_size / 4);
- tcs_out_layout = output_patch_size / 4;
+ tcs_out_layout = (output_patch_size / 4) |
+ (num_tcs_input_cp << 13);
tcs_out_offsets = (output_patch0_offset / 16) |
((perpatch_output_offset / 16) << 16);
offchip_layout = *num_patches |
(num_tcs_output_cp << 6) |
(pervertex_output_patch_size * *num_patches << 12);
/* Compute the LDS size. */
lds_size = output_patch0_offset + output_patch_size * *num_patches;
if (sctx->b.chip_class >= CIK) {
@@ -261,41 +262,41 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
S_00B42C_LDS_SIZE(lds_size);
radeon_set_sh_reg(cs, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, hs_rsrc2);
/* Set userdata SGPRs for merged LS-HS. */
radeon_set_sh_reg_seq(cs,
R_00B430_SPI_SHADER_USER_DATA_LS_0 +
GFX9_SGPR_TCS_OFFCHIP_LAYOUT * 4, 3);
radeon_emit(cs, offchip_layout);
radeon_emit(cs, tcs_out_offsets);
- radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
+ radeon_emit(cs, tcs_out_layout);
} else {
unsigned ls_rsrc2 = ls_current->config.rsrc2;
si_multiwave_lds_size_workaround(sctx->screen, &lds_size);
ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size);
/* Due to a hw bug, RSRC2_LS must be written twice with another
* LS register written in between. */
if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
radeon_emit(cs, ls_current->config.rsrc1);
radeon_emit(cs, ls_rsrc2);
/* Set userdata SGPRs for TCS. */
radeon_set_sh_reg_seq(cs,
R_00B430_SPI_SHADER_USER_DATA_HS_0 + GFX6_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4);
radeon_emit(cs, offchip_layout);
radeon_emit(cs, tcs_out_offsets);
- radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
+ radeon_emit(cs, tcs_out_layout);
radeon_emit(cs, tcs_in_layout);
}
/* Set userdata SGPRs for TES. */
radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, 2);
radeon_emit(cs, offchip_layout);
radeon_emit(cs, r600_resource(sctx->tess_offchip_ring)->gpu_address >> 16);
ls_hs_config = S_028B58_NUM_PATCHES(*num_patches) |
S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) |
--
2.7.4
More information about the mesa-dev
mailing list