[Mesa-dev] [PATCH 11/19] radeonsi: don't re-create shader PM4 states after scratch buffer update
Marek Olšák
maraeo at gmail.com
Sun Oct 2 21:09:26 UTC 2016
From: Marek Olšák <marek.olsak at amd.com>
---
src/gallium/drivers/radeonsi/si_pm4.c | 9 +++++++-
src/gallium/drivers/radeonsi/si_pm4.h | 1 +
src/gallium/drivers/radeonsi/si_state_shaders.c | 30 +++++++++++++------------
3 files changed, 25 insertions(+), 15 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c
index c3032fc..386d093 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -96,25 +96,32 @@ void si_pm4_add_bo(struct si_pm4_state *state,
enum radeon_bo_priority priority)
{
unsigned idx = state->nbo++;
assert(idx < SI_PM4_MAX_BO);
r600_resource_reference(&state->bo[idx], bo);
state->bo_usage[idx] = usage;
state->bo_priority[idx] = priority;
}
-void si_pm4_free_state_simple(struct si_pm4_state *state)
+void si_pm4_clear_state(struct si_pm4_state *state)
{
for (int i = 0; i < state->nbo; ++i)
r600_resource_reference(&state->bo[i], NULL);
r600_resource_reference(&state->indirect_buffer, NULL);
+ state->nbo = 0;
+ state->ndw = 0;
+}
+
+void si_pm4_free_state_simple(struct si_pm4_state *state)
+{
+ si_pm4_clear_state(state);
FREE(state);
}
void si_pm4_free_state(struct si_context *sctx,
struct si_pm4_state *state,
unsigned idx)
{
if (!state)
return;
diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h
index 35fa6c3..9b02a80 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -64,20 +64,21 @@ void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw);
void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate);
void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val);
void si_pm4_add_bo(struct si_pm4_state *state,
struct r600_resource *bo,
enum radeon_bo_usage usage,
enum radeon_bo_priority priority);
void si_pm4_upload_indirect_buffer(struct si_context *sctx,
struct si_pm4_state *state);
+void si_pm4_clear_state(struct si_pm4_state *state);
void si_pm4_free_state_simple(struct si_pm4_state *state);
void si_pm4_free_state(struct si_context *sctx,
struct si_pm4_state *state,
unsigned idx);
void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state);
void si_pm4_emit_dirty(struct si_context *sctx);
void si_pm4_reset_emitted(struct si_context *sctx);
#endif
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 816aadc..acbceba 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -322,27 +322,37 @@ static void si_set_tesseval_regs(struct si_screen *sscreen,
} else
distribution_mode = V_028B6C_DISTRIBUTION_MODE_NO_DIST;
si_pm4_set_reg(pm4, R_028B6C_VGT_TF_PARAM,
S_028B6C_TYPE(type) |
S_028B6C_PARTITIONING(partitioning) |
S_028B6C_TOPOLOGY(topology) |
S_028B6C_DISTRIBUTION_MODE(distribution_mode));
}
+static struct si_pm4_state *si_get_shader_pm4_state(struct si_shader *shader)
+{
+ if (shader->pm4)
+ si_pm4_clear_state(shader->pm4);
+ else
+ shader->pm4 = CALLOC_STRUCT(si_pm4_state);
+
+ return shader->pm4;
+}
+
static void si_shader_ls(struct si_shader *shader)
{
struct si_pm4_state *pm4;
unsigned vgpr_comp_cnt;
uint64_t va;
- pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
+ pm4 = si_get_shader_pm4_state(shader);
if (!pm4)
return;
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
/* We need at least 2 components for LS.
* VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
@@ -356,21 +366,21 @@ static void si_shader_ls(struct si_shader *shader)
S_00B528_FLOAT_MODE(shader->config.float_mode);
shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_LS_NUM_USER_SGPR) |
S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
}
static void si_shader_hs(struct si_shader *shader)
{
struct si_pm4_state *pm4;
uint64_t va;
- pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
+ pm4 = si_get_shader_pm4_state(shader);
if (!pm4)
return;
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
@@ -384,22 +394,21 @@ static void si_shader_hs(struct si_shader *shader)
}
static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
{
struct si_pm4_state *pm4;
unsigned num_user_sgprs;
unsigned vgpr_comp_cnt;
uint64_t va;
unsigned oc_lds_en;
- pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
-
+ pm4 = si_get_shader_pm4_state(shader);
if (!pm4)
return;
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
if (shader->selector->type == PIPE_SHADER_VERTEX) {
vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 0;
num_user_sgprs = SI_ES_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
@@ -460,22 +469,21 @@ static void si_shader_gs(struct si_shader *shader)
unsigned gs_vert_itemsize = shader->selector->gsvs_vertex_size;
unsigned gsvs_itemsize = shader->selector->max_gsvs_emit_size >> 2;
unsigned gs_num_invocations = shader->selector->gs_num_invocations;
struct si_pm4_state *pm4;
uint64_t va;
unsigned max_stream = shader->selector->max_gs_stream;
/* The GSVS_RING_ITEMSIZE register takes 15 bits */
assert(gsvs_itemsize < (1 << 15));
- pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
-
+ pm4 = si_get_shader_pm4_state(shader);
if (!pm4)
return;
si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, si_vgt_gs_mode(shader));
si_pm4_set_reg(pm4, R_028A60_VGT_GSVS_RING_OFFSET_1, gsvs_itemsize);
si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize * ((max_stream >= 2) ? 2 : 1));
si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize * ((max_stream >= 3) ? 3 : 1));
si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize * (max_stream + 1));
@@ -518,22 +526,21 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
{
struct si_pm4_state *pm4;
unsigned num_user_sgprs;
unsigned nparams, vgpr_comp_cnt;
uint64_t va;
unsigned oc_lds_en;
unsigned window_space =
shader->selector->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
bool enable_prim_id = si_vs_exports_prim_id(shader);
- pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
-
+ pm4 = si_get_shader_pm4_state(shader);
if (!pm4)
return;
/* We always write VGT_GS_MODE in the VS state, because every switch
* between different shader pipelines involving a different GS or no
* GS at all involves a switch of the VS (different GS use different
* copy shaders). On the other hand, when the API switches from a GS to
* no GS and then back to the same GS used originally, the GS state is
* not sent again.
*/
@@ -682,22 +689,21 @@ static void si_shader_ps(struct si_shader *shader)
/* we need to enable at least one of them, otherwise we hang the GPU */
assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
G_0286CC_PERSP_CENTER_ENA(input_ena) ||
G_0286CC_PERSP_CENTROID_ENA(input_ena) ||
G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) ||
G_0286CC_LINEAR_SAMPLE_ENA(input_ena) ||
G_0286CC_LINEAR_CENTER_ENA(input_ena) ||
G_0286CC_LINEAR_CENTROID_ENA(input_ena) ||
G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena));
- pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
-
+ pm4 = si_get_shader_pm4_state(shader);
if (!pm4)
return;
/* SPI_BARYC_CNTL.POS_FLOAT_LOCATION
* Possible vaules:
* 0 -> Position = pixel center
* 1 -> Position = pixel centroid
* 2 -> Position = at sample position
*
* From GLSL 4.5 specification, section 7.1:
@@ -784,24 +790,20 @@ static void si_shader_ps(struct si_shader *shader)
else if (info->num_memory_instructions >= 2 ||
shader->binary.code_size > 100*4)
shader->z_order = V_02880C_EARLY_Z_THEN_RE_Z;
else
shader->z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
}
static void si_shader_init_pm4_state(struct si_screen *sscreen,
struct si_shader *shader)
{
-
- if (shader->pm4)
- si_pm4_free_state_simple(shader->pm4);
-
switch (shader->selector->type) {
case PIPE_SHADER_VERTEX:
if (shader->key.vs.as_ls)
si_shader_ls(shader);
else if (shader->key.vs.as_es)
si_shader_es(sscreen, shader);
else
si_shader_vs(sscreen, shader, NULL);
break;
case PIPE_SHADER_TESS_CTRL:
--
2.7.4
More information about the mesa-dev
mailing list