[Mesa-dev] [PATCH 4/4] radv: use CLEAR_STATE for initializing some registers

Samuel Pitoiset samuel.pitoiset at gmail.com
Wed Oct 11 17:15:19 UTC 2017


Based on RadeonSI.

This improves some Vulkan demos by +1% to +3%.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
 src/amd/vulkan/si_cmd_buffer.c | 99 +++++++++++++++++++++++++-----------------
 1 file changed, 58 insertions(+), 41 deletions(-)

diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index 626b68ad8e..1e8b43d4fa 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -332,15 +332,25 @@ si_emit_config(struct radv_physical_device *physical_device,
 {
 	int i;
 
+	/* Only SI can disable CLEAR_STATE for now. */
+	assert(physical_device->has_clear_state ||
+	       physical_device->rad_info.chip_class == SI);
+
 	radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
 	radeon_emit(cs, CONTEXT_CONTROL_LOAD_ENABLE(1));
 	radeon_emit(cs, CONTEXT_CONTROL_SHADOW_ENABLE(1));
 
+	if (physical_device->has_clear_state) {
+		radeon_emit(cs, PKT3(PKT3_CLEAR_STATE, 1, 0));
+		radeon_emit(cs, 0);
+	}
+
 	if (physical_device->rad_info.chip_class <= VI)
 		si_set_raster_config(physical_device, cs);
 
 	radeon_set_context_reg(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
-	radeon_set_context_reg(cs, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
+	if (!physical_device->has_clear_state)
+		radeon_set_context_reg(cs, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
 
 	/* FIXME calculate these values somehow ??? */
 	if (physical_device->rad_info.chip_class <= VI) {
@@ -348,16 +358,15 @@ si_emit_config(struct radv_physical_device *physical_device,
 		radeon_set_context_reg(cs, R_028A58_VGT_ES_PER_GS, 0x40);
 	}
 
-	radeon_set_context_reg(cs, R_028A5C_VGT_GS_PER_VS, 0x2);
-
-	radeon_set_context_reg(cs, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
-	radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
+	if (!physical_device->has_clear_state) {
+		radeon_set_context_reg(cs, R_028A5C_VGT_GS_PER_VS, 0x2);
+		radeon_set_context_reg(cs, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
+		radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
+	}
 
-	radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
 	radeon_set_context_reg(cs, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
-	if (physical_device->rad_info.chip_class >= GFX9)
-		radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF, 0);
-	radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0);
+	if (!physical_device->has_clear_state)
+		radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0);
 	if (physical_device->rad_info.chip_class < CIK)
 		radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
 				      S_008A14_CLIP_VTX_REORDER_ENA(1));
@@ -365,30 +374,43 @@ si_emit_config(struct radv_physical_device *physical_device,
 	radeon_set_context_reg(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
 	radeon_set_context_reg(cs, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
 
-	radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
+	if (!physical_device->has_clear_state)
+		radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
 
-	for (i = 0; i < 16; i++) {
-		radeon_set_context_reg(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0);
-		radeon_set_context_reg(cs, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0));
+	/* CLEAR_STATE doesn't clear these correctly on certain generations.
+	 * I don't know why. Deduced by trial and error.
+	 */
+	if (physical_device->rad_info.chip_class <= CIK) {
+		radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
+		radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL,
+				       S_028204_WINDOW_OFFSET_DISABLE(1));
+		radeon_set_context_reg(cs, R_028240_PA_SC_GENERIC_SCISSOR_TL,
+				       S_028240_WINDOW_OFFSET_DISABLE(1));
+		radeon_set_context_reg(cs, R_028244_PA_SC_GENERIC_SCISSOR_BR,
+				       S_028244_BR_X(16384) | S_028244_BR_Y(16384));
+		radeon_set_context_reg(cs, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
+		radeon_set_context_reg(cs, R_028034_PA_SC_SCREEN_SCISSOR_BR,
+				       S_028034_BR_X(16384) | S_028034_BR_Y(16384));
+	}
+
+	if (!physical_device->has_clear_state) {
+		for (i = 0; i < 16; i++) {
+			radeon_set_context_reg(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0);
+			radeon_set_context_reg(cs, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0));
+		}
+	}
+
+	if (!physical_device->has_clear_state) {
+		radeon_set_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
+		radeon_set_context_reg(cs, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
+		/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
+		radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
+		radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0);
+		radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
+		radeon_set_context_reg(cs, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
+		radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
 	}
 
-	radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
-	radeon_set_context_reg(cs, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
-	radeon_set_context_reg(cs, R_028244_PA_SC_GENERIC_SCISSOR_BR,
-			       S_028244_BR_X(16384) | S_028244_BR_Y(16384));
-	radeon_set_context_reg(cs, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
-	radeon_set_context_reg(cs, R_028034_PA_SC_SCREEN_SCISSOR_BR,
-			       S_028034_BR_X(16384) | S_028034_BR_Y(16384));
-
-	radeon_set_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
-	radeon_set_context_reg(cs, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
-	/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
-	radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
-	radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0);
-
-	radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
-	radeon_set_context_reg(cs, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
-	radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
 	radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE,
 			       S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
 			       S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE));
@@ -398,6 +420,11 @@ si_emit_config(struct radv_physical_device *physical_device,
 		radeon_set_uconfig_reg(cs, R_030924_VGT_MIN_VTX_INDX, 0);
 		radeon_set_uconfig_reg(cs, R_030928_VGT_INDX_OFFSET, 0);
 	} else {
+		/* These registers, when written, also overwrite the
+		 * CLEAR_STATE context, so we can't rely on CLEAR_STATE setting
+		 * them.  It would be an issue if there was another UMD
+		 * changing them.
+		 */
 		radeon_set_context_reg(cs, R_028400_VGT_MAX_VTX_INDX, ~0);
 		radeon_set_context_reg(cs, R_028404_VGT_MIN_VTX_INDX, 0);
 		radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
@@ -448,9 +475,6 @@ si_emit_config(struct radv_physical_device *physical_device,
 		radeon_set_context_reg(cs, R_028424_CB_DCC_CONTROL,
 				       S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
 				       S_028424_OVERWRITE_COMBINER_WATERMARK(4));
-		if (physical_device->rad_info.family < CHIP_POLARIS10)
-			radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
-		radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
 
 		vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) |
 			S_028B50_ACCUM_TRI(11) |
@@ -463,14 +487,11 @@ si_emit_config(struct radv_physical_device *physical_device,
 
 		radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION,
 				       vgt_tess_distribution);
-	} else {
+	} else if (!physical_device->has_clear_state) {
 		radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
 		radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
 	}
 
-	if (physical_device->has_rbplus)
-		radeon_set_context_reg(cs, R_028C40_PA_SC_SHADER_CONTROL, 0);
-
 	if (physical_device->rad_info.chip_class >= GFX9) {
 		unsigned num_se = physical_device->rad_info.max_se;
 		unsigned pc_lines = 0;
@@ -488,10 +509,6 @@ si_emit_config(struct radv_physical_device *physical_device,
 
 		radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL,
 				       S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF));
-		radeon_set_context_reg(cs, R_028064_DB_RENDER_FILTER, 0);
-		/* TODO: We can use this to disable RBs for rendering to GART: */
-		radeon_set_context_reg(cs, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, 0);
-		radeon_set_context_reg(cs, R_02883C_PA_SU_OVER_RASTERIZATION_CNTL, 0);
 		/* TODO: Enable the binner: */
 		radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0,
 				       S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
-- 
2.14.2



More information about the mesa-dev mailing list