<div dir="ltr">For full effect, you want to also enable shrink_vec_var_arrays and split_array_vars </div> <div class="gmail_quote"><div dir="ltr">On Wed, Oct 17, 2018 at 6:00 PM Timothy Arceri <<a href="mailto:tarceri@itsqueeze.com">tarceri@itsqueeze.com</a>> wrote: </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Totals from affected shaders: SGPRS: 1112 -> 1112 (0.00 %) VGPRS: 1492 -> 1196 (-19.84 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 112172 -> 101316 (-9.68 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 93 -> 98 (5.38 %) Wait states: 0 -> 0 (0.00 %) All affected shaders are from "Batman: Arkham City" over DXVK. The pass detects that the temporary array created by DXVK for storing TCS inputs is a copy of the input arrays and allows us to avoid copying all of the input data and then indirecting on it with if-ladders, instead we just do indirect indexing. --- src/amd/vulkan/radv_pipeline.c | 6 +++--- src/amd/vulkan/radv_shader.c | 22 ++++++++++++++++++---- src/amd/vulkan/radv_shader.h | 3 ++- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index e1d665d0ac7..8d15a048bbf 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1808,13 +1808,13 @@ radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders) ac_lower_indirect_derefs(ordered_shaders[i], pipeline->device->physical_device->rad_info.chip_class); } - radv_optimize_nir(ordered_shaders[i], false); + radv_optimize_nir(ordered_shaders[i], false, false); if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) { ac_lower_indirect_derefs(ordered_shaders[i - 1], pipeline->device->physical_device->rad_info.chip_class); } - radv_optimize_nir(ordered_shaders[i - 1], false); + radv_optimize_nir(ordered_shaders[i - 1], false, false); } } } @@ -2073,7 +2073,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline, if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)) { nir_lower_io_to_scalar_early(nir[i], mask); - radv_optimize_nir(nir[i], false); + radv_optimize_nir(nir[i], false, false); } } } diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 3b3422c8da6..52aa83d4a5a 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -118,7 +118,8 @@ void radv_DestroyShaderModule( } void -radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively) +radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, + bool allow_copies) { bool progress; @@ -128,6 +129,15 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively) NIR_PASS_V(shader, nir_lower_vars_to_ssa); NIR_PASS_V(shader, nir_lower_pack); + if (allow_copies) { + /* Only run this pass in the first call to + * radv_optimize_nir. Later calls assume that we've + * lowered away any copy_deref instructions and we + * don't want to introduce any more. + */ + NIR_PASS(progress, shader, nir_opt_find_array_copies); + } + NIR_PASS(progress, shader, nir_opt_copy_prop_vars); NIR_PASS(progress, shader, nir_opt_dead_write_vars); @@ -306,7 +316,6 @@ radv_shader_compile_to_nir(struct radv_device *device, } nir_split_var_copies(nir); - nir_lower_var_copies(nir); nir_lower_global_vars_to_local(nir); nir_remove_dead_variables(nir, nir_var_local); @@ -323,7 +332,12 @@ radv_shader_compile_to_nir(struct radv_device *device, nir_lower_load_const_to_scalar(nir); if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)) - radv_optimize_nir(nir, false); + radv_optimize_nir(nir, false, true); + + /* We call nir_lower_var_copies() after the first radv_optimize_nir() + * to remove any copies introduced by nir_opt_find_array_copies(). + */ + nir_lower_var_copies(nir); /* Indirect lowering must be called after the radv_optimize_nir() loop * has been called at least once. Otherwise indirect lowering can @@ -331,7 +345,7 @@ radv_shader_compile_to_nir(struct radv_device *device, * considered too large for unrolling. */ ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class); - radv_optimize_nir(nir, flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT); + radv_optimize_nir(nir, flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT, false); return nir; } diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index c490b69f52b..22423e5f99a 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -298,7 +298,8 @@ struct radv_shader_slab { }; void -radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively); +radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, + bool allow_copies); nir_shader * radv_shader_compile_to_nir(struct radv_device *device, -- 2.17.1 _______________________________________________ mesa-dev mailing list <a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a> <a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a> </blockquote></div>