[Mesa-dev] [PATCH 4/6] radeonsi:optimizing SET_CONTEXT_REG for shaders PS
Sonny Jiang
sonny.jiang at amd.com
Tue Sep 18 20:21:13 UTC 2018
Signed-off-by: Sonny Jiang <sonny.jiang at amd.com>
---
src/gallium/drivers/radeonsi/si_gfx_cs.c | 7 +
src/gallium/drivers/radeonsi/si_state.h | 11 ++
.../drivers/radeonsi/si_state_shaders.c | 144 ++++++++++--------
3 files changed, 98 insertions(+), 64 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 2e10d766a6..8c1bee8ed6 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -371,6 +371,13 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
ctx->tracked_regs.reg_value[SI_TRACKED_SPI_VS_OUT_CONFIG] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_POS_FORMAT] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VTE_CNTL] = 0x00000000;
+ ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ENA] = 0x00000000;
+ ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ADDR] = 0x00000000;
+ ctx->tracked_regs.reg_value[SI_TRACKED_SPI_BARYC_CNTL] = 0x00000000;
+ ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_IN_CONTROL] = 0x00000002;
+ ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_Z_FORMAT] = 0x00000000;
+ ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_COL_FORMAT] = 0x00000000;
+ ctx->tracked_regs.reg_value[SI_TRACKED_CB_SHADER_MASK] = 0xffffffff;
/* Set all saved registers state to saved. */
ctx->tracked_regs.reg_saved = 0xffffffffffffffff;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index bf1ae9f18f..878b67f0ed 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -302,6 +302,17 @@ enum si_tracked_reg {
SI_TRACKED_SPI_SHADER_POS_FORMAT,
SI_TRACKED_PA_CL_VTE_CNTL,
+ SI_TRACKED_SPI_PS_INPUT_ENA, /* 2 consecutive registers */
+ SI_TRACKED_SPI_PS_INPUT_ADDR,
+
+ SI_TRACKED_SPI_BARYC_CNTL,
+ SI_TRACKED_SPI_PS_IN_CONTROL,
+
+ SI_TRACKED_SPI_SHADER_Z_FORMAT, /* 2 consecutive registers */
+ SI_TRACKED_SPI_SHADER_COL_FORMAT,
+
+ SI_TRACKED_CB_SHADER_MASK,
+
SI_NUM_TRACKED_REGS,
};
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 332fdae3b3..e5732f7920 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1100,12 +1100,88 @@ static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
return value;
}
-static void si_shader_ps(struct si_shader *shader)
+static void si_emit_shader_ps(struct si_context *sctx)
{
+ struct si_shader *shader = sctx->queued.named.ps->shader;
+ if (!shader)
+ return;
+
struct tgsi_shader_info *info = &shader->selector->info;
- struct si_pm4_state *pm4;
- unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask;
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
+ unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask;
+
+ /* R_0286CC_SPI_PS_INPUT_ENA, R_0286D0_SPI_PS_INPUT_ADDR*/
+ radeon_opt_set_context_reg2(sctx, R_0286CC_SPI_PS_INPUT_ENA,
+ SI_TRACKED_SPI_PS_INPUT_ENA,
+ shader->config.spi_ps_input_ena,
+ shader->config.spi_ps_input_addr);
+
+ /* SPI_BARYC_CNTL.POS_FLOAT_LOCATION
+ * Possible vaules:
+ * 0 -> Position = pixel center
+ * 1 -> Position = pixel centroid
+ * 2 -> Position = at sample position
+ *
+ * From GLSL 4.5 specification, section 7.1:
+ * "The variable gl_FragCoord is available as an input variable from
+ * within fragment shaders and it holds the window relative coordinates
+ * (x, y, z, 1/w) values for the fragment. If multi-sampling, this
+ * value can be for any location within the pixel, or one of the
+ * fragment samples. The use of centroid does not further restrict
+ * this value to be inside the current primitive."
+ *
+ * Meaning that centroid has no effect and we can return anything within
+ * the pixel. Thus, return the value at sample position, because that's
+ * the most accurate one shaders can get.
+ */
+ spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
+
+ if (info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] ==
+ TGSI_FS_COORD_PIXEL_CENTER_INTEGER)
+ spi_baryc_cntl |= S_0286E0_POS_FLOAT_ULC(1);
+
+ /* Set interpolation controls. */
+ spi_ps_in_control = S_0286D8_NUM_INTERP(si_get_ps_num_interp(shader));
+
+ radeon_opt_set_context_reg(sctx, R_0286E0_SPI_BARYC_CNTL,
+ SI_TRACKED_SPI_BARYC_CNTL, spi_baryc_cntl);
+ radeon_opt_set_context_reg(sctx, R_0286D8_SPI_PS_IN_CONTROL,
+ SI_TRACKED_SPI_PS_IN_CONTROL,
+ spi_ps_in_control);
+
+ spi_shader_col_format = si_get_spi_shader_col_format(shader);
+ cb_shader_mask = ac_get_cb_shader_mask(spi_shader_col_format);
+
+ /* Ensure that some export memory is always allocated, for two reasons:
+ *
+ * 1) Correctness: The hardware ignores the EXEC mask if no export
+ * memory is allocated, so KILL and alpha test do not work correctly
+ * without this.
+ * 2) Performance: Every shader needs at least a NULL export, even when
+ * it writes no color/depth output. The NULL export instruction
+ * stalls without this setting.
+ *
+ * Don't add this to CB_SHADER_MASK.
+ */
+ if (!spi_shader_col_format &&
+ !info->writes_z && !info->writes_stencil && !info->writes_samplemask)
+ spi_shader_col_format = V_028714_SPI_SHADER_32_R;
+
+ /* R_028710_SPI_SHADER_Z_FORMAT, R_028714_SPI_SHADER_COL_FORMAT */
+ radeon_opt_set_context_reg2(sctx, R_028710_SPI_SHADER_Z_FORMAT,
+ SI_TRACKED_SPI_SHADER_Z_FORMAT,
+ ac_get_spi_shader_z_format(info->writes_z,
+ info->writes_stencil,
+ info->writes_samplemask),
+ spi_shader_col_format);
+
+ radeon_opt_set_context_reg(sctx, R_02823C_CB_SHADER_MASK,
+ SI_TRACKED_CB_SHADER_MASK, cb_shader_mask);
+}
+
+static void si_shader_ps(struct si_shader *shader)
+{
+ struct si_pm4_state *pm4;
uint64_t va;
unsigned input_ena = shader->config.spi_ps_input_ena;
@@ -1157,67 +1233,7 @@ static void si_shader_ps(struct si_shader *shader)
if (!pm4)
return;
- /* SPI_BARYC_CNTL.POS_FLOAT_LOCATION
- * Possible vaules:
- * 0 -> Position = pixel center
- * 1 -> Position = pixel centroid
- * 2 -> Position = at sample position
- *
- * From GLSL 4.5 specification, section 7.1:
- * "The variable gl_FragCoord is available as an input variable from
- * within fragment shaders and it holds the window relative coordinates
- * (x, y, z, 1/w) values for the fragment. If multi-sampling, this
- * value can be for any location within the pixel, or one of the
- * fragment samples. The use of centroid does not further restrict
- * this value to be inside the current primitive."
- *
- * Meaning that centroid has no effect and we can return anything within
- * the pixel. Thus, return the value at sample position, because that's
- * the most accurate one shaders can get.
- */
- spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
-
- if (info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] ==
- TGSI_FS_COORD_PIXEL_CENTER_INTEGER)
- spi_baryc_cntl |= S_0286E0_POS_FLOAT_ULC(1);
-
- spi_shader_col_format = si_get_spi_shader_col_format(shader);
- cb_shader_mask = ac_get_cb_shader_mask(spi_shader_col_format);
-
- /* Ensure that some export memory is always allocated, for two reasons:
- *
- * 1) Correctness: The hardware ignores the EXEC mask if no export
- * memory is allocated, so KILL and alpha test do not work correctly
- * without this.
- * 2) Performance: Every shader needs at least a NULL export, even when
- * it writes no color/depth output. The NULL export instruction
- * stalls without this setting.
- *
- * Don't add this to CB_SHADER_MASK.
- */
- if (!spi_shader_col_format &&
- !info->writes_z && !info->writes_stencil && !info->writes_samplemask)
- spi_shader_col_format = V_028714_SPI_SHADER_32_R;
-
- si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, input_ena);
- si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR,
- shader->config.spi_ps_input_addr);
-
- /* Set interpolation controls. */
- spi_ps_in_control = S_0286D8_NUM_INTERP(si_get_ps_num_interp(shader));
-
- /* Set registers. */
- si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
- si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
-
- si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT,
- ac_get_spi_shader_z_format(info->writes_z,
- info->writes_stencil,
- info->writes_samplemask));
-
- si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, spi_shader_col_format);
- si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, cb_shader_mask);
-
+ pm4->atom.emit = si_emit_shader_ps;
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
--
2.17.1
More information about the mesa-dev
mailing list