[Mesa-dev] [PATCH] radv: use nir_opt_find_array_copies()
Jason Ekstrand
jason at jlekstrand.net
Wed Oct 17 23:15:17 UTC 2018
and split_struct_vars while you're at it
On Wed, Oct 17, 2018 at 6:15 PM Jason Ekstrand <jason at jlekstrand.net> wrote:
> For full effect, you want to also enable shrink_vec_var_arrays and
> split_array_vars
>
> On Wed, Oct 17, 2018 at 6:00 PM Timothy Arceri <tarceri at itsqueeze.com>
> wrote:
>
>> Totals from affected shaders:
>> SGPRS: 1112 -> 1112 (0.00 %)
>> VGPRS: 1492 -> 1196 (-19.84 %)
>> Spilled SGPRs: 0 -> 0 (0.00 %)
>> Spilled VGPRs: 0 -> 0 (0.00 %)
>> Private memory VGPRs: 0 -> 0 (0.00 %)
>> Scratch size: 0 -> 0 (0.00 %) dwords per thread
>> Code Size: 112172 -> 101316 (-9.68 %) bytes
>> LDS: 0 -> 0 (0.00 %) blocks
>> Max Waves: 93 -> 98 (5.38 %)
>> Wait states: 0 -> 0 (0.00 %)
>>
>> All affected shaders are from "Batman: Arkham City" over DXVK.
>>
>> The pass detects that the temporary array created by DXVK for
>> storing TCS inputs is a copy of the input arrays and allows
>> us to avoid copying all of the input data and then indirecting
>> on it with if-ladders, instead we just do indirect indexing.
>> ---
>> src/amd/vulkan/radv_pipeline.c | 6 +++---
>> src/amd/vulkan/radv_shader.c | 22 ++++++++++++++++++----
>> src/amd/vulkan/radv_shader.h | 3 ++-
>> 3 files changed, 23 insertions(+), 8 deletions(-)
>>
>> diff --git a/src/amd/vulkan/radv_pipeline.c
>> b/src/amd/vulkan/radv_pipeline.c
>> index e1d665d0ac7..8d15a048bbf 100644
>> --- a/src/amd/vulkan/radv_pipeline.c
>> +++ b/src/amd/vulkan/radv_pipeline.c
>> @@ -1808,13 +1808,13 @@ radv_link_shaders(struct radv_pipeline *pipeline,
>> nir_shader **shaders)
>>
>> ac_lower_indirect_derefs(ordered_shaders[i],
>>
>> pipeline->device->physical_device->rad_info.chip_class);
>> }
>> - radv_optimize_nir(ordered_shaders[i], false);
>> + radv_optimize_nir(ordered_shaders[i], false,
>> false);
>>
>> if
>> (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) {
>>
>> ac_lower_indirect_derefs(ordered_shaders[i - 1],
>>
>> pipeline->device->physical_device->rad_info.chip_class);
>> }
>> - radv_optimize_nir(ordered_shaders[i - 1], false);
>> + radv_optimize_nir(ordered_shaders[i - 1], false,
>> false);
>> }
>> }
>> }
>> @@ -2073,7 +2073,7 @@ void radv_create_shaders(struct radv_pipeline
>> *pipeline,
>>
>> if (!(flags &
>> VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)) {
>> nir_lower_io_to_scalar_early(nir[i],
>> mask);
>> - radv_optimize_nir(nir[i], false);
>> + radv_optimize_nir(nir[i], false, false);
>> }
>> }
>> }
>> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
>> index 3b3422c8da6..52aa83d4a5a 100644
>> --- a/src/amd/vulkan/radv_shader.c
>> +++ b/src/amd/vulkan/radv_shader.c
>> @@ -118,7 +118,8 @@ void radv_DestroyShaderModule(
>> }
>>
>> void
>> -radv_optimize_nir(struct nir_shader *shader, bool
>> optimize_conservatively)
>> +radv_optimize_nir(struct nir_shader *shader, bool
>> optimize_conservatively,
>> + bool allow_copies)
>> {
>> bool progress;
>>
>> @@ -128,6 +129,15 @@ radv_optimize_nir(struct nir_shader *shader, bool
>> optimize_conservatively)
>> NIR_PASS_V(shader, nir_lower_vars_to_ssa);
>> NIR_PASS_V(shader, nir_lower_pack);
>>
>> + if (allow_copies) {
>> + /* Only run this pass in the first call to
>> + * radv_optimize_nir. Later calls assume that
>> we've
>> + * lowered away any copy_deref instructions and we
>> + * don't want to introduce any more.
>> + */
>> + NIR_PASS(progress, shader,
>> nir_opt_find_array_copies);
>> + }
>> +
>> NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
>> NIR_PASS(progress, shader, nir_opt_dead_write_vars);
>>
>> @@ -306,7 +316,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
>> }
>>
>> nir_split_var_copies(nir);
>> - nir_lower_var_copies(nir);
>>
>> nir_lower_global_vars_to_local(nir);
>> nir_remove_dead_variables(nir, nir_var_local);
>> @@ -323,7 +332,12 @@ radv_shader_compile_to_nir(struct radv_device
>> *device,
>> nir_lower_load_const_to_scalar(nir);
>>
>> if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
>> - radv_optimize_nir(nir, false);
>> + radv_optimize_nir(nir, false, true);
>> +
>> + /* We call nir_lower_var_copies() after the first
>> radv_optimize_nir()
>> + * to remove any copies introduced by nir_opt_find_array_copies().
>> + */
>> + nir_lower_var_copies(nir);
>>
>> /* Indirect lowering must be called after the radv_optimize_nir()
>> loop
>> * has been called at least once. Otherwise indirect lowering can
>> @@ -331,7 +345,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
>> * considered too large for unrolling.
>> */
>> ac_lower_indirect_derefs(nir,
>> device->physical_device->rad_info.chip_class);
>> - radv_optimize_nir(nir, flags &
>> VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT);
>> + radv_optimize_nir(nir, flags &
>> VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT, false);
>>
>> return nir;
>> }
>> diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
>> index c490b69f52b..22423e5f99a 100644
>> --- a/src/amd/vulkan/radv_shader.h
>> +++ b/src/amd/vulkan/radv_shader.h
>> @@ -298,7 +298,8 @@ struct radv_shader_slab {
>> };
>>
>> void
>> -radv_optimize_nir(struct nir_shader *shader, bool
>> optimize_conservatively);
>> +radv_optimize_nir(struct nir_shader *shader, bool
>> optimize_conservatively,
>> + bool allow_copies);
>>
>> nir_shader *
>> radv_shader_compile_to_nir(struct radv_device *device,
>> --
>> 2.17.1
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20181017/eb671691/attachment.html>
More information about the mesa-dev
mailing list