<div dir="ltr">and split_struct_vars while you're at it<br></div><br><div class="gmail_quote"><div dir="ltr">On Wed, Oct 17, 2018 at 6:15 PM Jason Ekstrand <<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr">For full effect, you want to also enable shrink_vec_var_arrays and split_array_vars<br></div><br><div class="gmail_quote"><div dir="ltr">On Wed, Oct 17, 2018 at 6:00 PM Timothy Arceri <<a href="mailto:tarceri@itsqueeze.com">tarceri@itsqueeze.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Totals from affected shaders:<br>
SGPRS: 1112 -> 1112 (0.00 %)<br>
VGPRS: 1492 -> 1196 (-19.84 %)<br>
Spilled SGPRs: 0 -> 0 (0.00 %)<br>
Spilled VGPRs: 0 -> 0 (0.00 %)<br>
Private memory VGPRs: 0 -> 0 (0.00 %)<br>
Scratch size: 0 -> 0 (0.00 %) dwords per thread<br>
Code Size: 112172 -> 101316 (-9.68 %) bytes<br>
LDS: 0 -> 0 (0.00 %) blocks<br>
Max Waves: 93 -> 98 (5.38 %)<br>
Wait states: 0 -> 0 (0.00 %)<br>
<br>
All affected shaders are from "Batman: Arkham City" over DXVK.<br>
<br>
The pass detects that the temporary array created by DXVK for<br>
storing TCS inputs is a copy of the input arrays and allows<br>
us to avoid copying all of the input data and then indirecting<br>
on it with if-ladders, instead we just do indirect indexing.<br>
---<br>
 src/amd/vulkan/radv_pipeline.c |  6 +++---<br>
 src/amd/vulkan/radv_shader.c   | 22 ++++++++++++++++++----<br>
 src/amd/vulkan/radv_shader.h   |  3 ++-<br>
 3 files changed, 23 insertions(+), 8 deletions(-)<br>
<br>
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c<br>
index e1d665d0ac7..8d15a048bbf 100644<br>
--- a/src/amd/vulkan/radv_pipeline.c<br>
+++ b/src/amd/vulkan/radv_pipeline.c<br>
@@ -1808,13 +1808,13 @@ radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders)<br>
                                ac_lower_indirect_derefs(ordered_shaders[i],<br>
                                                         pipeline->device->physical_device->rad_info.chip_class);<br>
                        }<br>
-                       radv_optimize_nir(ordered_shaders[i], false);<br>
+                       radv_optimize_nir(ordered_shaders[i], false, false);<br>
<br>
                        if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) {<br>
                                ac_lower_indirect_derefs(ordered_shaders[i - 1],<br>
                                                         pipeline->device->physical_device->rad_info.chip_class);<br>
                        }<br>
-                       radv_optimize_nir(ordered_shaders[i - 1], false);<br>
+                       radv_optimize_nir(ordered_shaders[i - 1], false, false);<br>
                }<br>
        }<br>
 }<br>
@@ -2073,7 +2073,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,<br>
<br>
                        if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)) {<br>
                                nir_lower_io_to_scalar_early(nir[i], mask);<br>
-                               radv_optimize_nir(nir[i], false);<br>
+                               radv_optimize_nir(nir[i], false, false);<br>
                        }<br>
                }<br>
        }<br>
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c<br>
index 3b3422c8da6..52aa83d4a5a 100644<br>
--- a/src/amd/vulkan/radv_shader.c<br>
+++ b/src/amd/vulkan/radv_shader.c<br>
@@ -118,7 +118,8 @@ void radv_DestroyShaderModule(<br>
 }<br>
<br>
 void<br>
-radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively)<br>
+radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively,<br>
+                  bool allow_copies)<br>
 {<br>
         bool progress;<br>
<br>
@@ -128,6 +129,15 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively)<br>
                 NIR_PASS_V(shader, nir_lower_vars_to_ssa);<br>
                NIR_PASS_V(shader, nir_lower_pack);<br>
<br>
+               if (allow_copies) {<br>
+                       /* Only run this pass in the first call to<br>
+                        * radv_optimize_nir.  Later calls assume that we've<br>
+                        * lowered away any copy_deref instructions and we<br>
+                        *  don't want to introduce any more.<br>
+                       */<br>
+                       NIR_PASS(progress, shader, nir_opt_find_array_copies);<br>
+               }<br>
+<br>
                NIR_PASS(progress, shader, nir_opt_copy_prop_vars);<br>
                NIR_PASS(progress, shader, nir_opt_dead_write_vars);<br>
<br>
@@ -306,7 +316,6 @@ radv_shader_compile_to_nir(struct radv_device *device,<br>
        }<br>
<br>
        nir_split_var_copies(nir);<br>
-       nir_lower_var_copies(nir);<br>
<br>
        nir_lower_global_vars_to_local(nir);<br>
        nir_remove_dead_variables(nir, nir_var_local);<br>
@@ -323,7 +332,12 @@ radv_shader_compile_to_nir(struct radv_device *device,<br>
        nir_lower_load_const_to_scalar(nir);<br>
<br>
        if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))<br>
-               radv_optimize_nir(nir, false);<br>
+               radv_optimize_nir(nir, false, true);<br>
+<br>
+       /* We call nir_lower_var_copies() after the first radv_optimize_nir()<br>
+        * to remove any copies introduced by nir_opt_find_array_copies().<br>
+        */<br>
+       nir_lower_var_copies(nir);<br>
<br>
        /* Indirect lowering must be called after the radv_optimize_nir() loop<br>
         * has been called at least once. Otherwise indirect lowering can<br>
@@ -331,7 +345,7 @@ radv_shader_compile_to_nir(struct radv_device *device,<br>
         * considered too large for unrolling.<br>
         */<br>
        ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class);<br>
-       radv_optimize_nir(nir, flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT);<br>
+       radv_optimize_nir(nir, flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT, false);<br>
<br>
        return nir;<br>
 }<br>
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h<br>
index c490b69f52b..22423e5f99a 100644<br>
--- a/src/amd/vulkan/radv_shader.h<br>
+++ b/src/amd/vulkan/radv_shader.h<br>
@@ -298,7 +298,8 @@ struct radv_shader_slab {<br>
 };<br>
<br>
 void<br>
-radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively);<br>
+radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively,<br>
+                 bool allow_copies);<br>
<br>
 nir_shader *<br>
 radv_shader_compile_to_nir(struct radv_device *device,<br>
-- <br>
2.17.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div></blockquote></div>