[Mesa-dev] [PATCH 3/8] radv/gfx10: launch 2 compute waves per CU before going onto the next CU

Samuel Pitoiset samuel.pitoiset at gmail.com
Fri Jul 12 10:17:13 UTC 2019


Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
 src/amd/vulkan/radv_pipeline.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index efb94cdcd23..a58b0d6d006 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -4378,6 +4378,7 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline)
 {
 	struct radv_shader_variant *compute_shader;
 	struct radv_device *device = pipeline->device;
+	unsigned threads_per_threadgroup;
 	unsigned threadgroups_per_cu = 1;
 	unsigned waves_per_threadgroup;
 	unsigned max_waves_per_sh = 0;
@@ -4402,10 +4403,14 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline)
 			  S_00B860_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10));
 
 	/* Calculate best compute resource limits. */
-	waves_per_threadgroup =
-		DIV_ROUND_UP(compute_shader->info.cs.block_size[0] *
-			     compute_shader->info.cs.block_size[1] *
-			     compute_shader->info.cs.block_size[2], 64);
+	threads_per_threadgroup = compute_shader->info.cs.block_size[0] *
+				  compute_shader->info.cs.block_size[1] *
+				  compute_shader->info.cs.block_size[2];
+	waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup, 64);
+
+	if (device->physical_device->rad_info.chip_class >= GFX10 &&
+	    waves_per_threadgroup == 1)
+		threadgroups_per_cu = 2;
 
 	radeon_set_sh_reg(&pipeline->cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
 			  ac_get_compute_resource_limits(&device->physical_device->rad_info,
-- 
2.22.0



More information about the mesa-dev mailing list