<div dir="ltr"><div class="gmail_quote"><div dir="ltr">On Thu, Aug 16, 2018 at 10:11 PM Timothy Arceri <<a href="mailto:tarceri@itsqueeze.com">tarceri@itsqueeze.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Nice numbers :)<br>
<br>
I'm not sure I 100% follow the why for patch 1 but the logic seems <br>
correct for the way you want to change <br></blockquote><div><br></div><div>I just sort-of found that one along the way.  It'd be good if Lionel took a real quick look at it.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">t, and all other patches look <br>
good to me so series:<br>
<br>
Reviewed-by: Timothy Arceri <<a href="mailto:tarceri@itsqueeze.com" target="_blank">tarceri@itsqueeze.com</a>><br></blockquote><div><br></div><div>Thanks!<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
On 08/08/18 18:12, Jason Ekstrand wrote:<br>
> This allows us to use the link-optimized shader for determining binding<br>
> table layouts and, more importantly, URB layouts.  For apps running on<br>
> DXVK, this is extremely important as DXVK likes to declare max-size<br>
> inputs and outputs and this lets is massively shrink our URB space<br>
> requirements.<br>
> <br>
> VkPipeline-db results (Batman pipelines only) on KBL:<br>
> <br>
>      total instructions in shared programs: 820403 -> 790008 (-3.70%)<br>
>      instructions in affected programs: 273759 -> 243364 (-11.10%)<br>
>      helped: 622<br>
>      HURT: 42<br>
> <br>
>      total spills in shared programs: 8449 -> 5212 (-38.31%)<br>
>      spills in affected programs: 3427 -> 190 (-94.46%)<br>
>      helped: 607<br>
>      HURT: 2<br>
> <br>
>      total fills in shared programs: 11638 -> 6067 (-47.87%)<br>
>      fills in affected programs: 5879 -> 308 (-94.76%)<br>
>      helped: 606<br>
>      HURT: 3<br>
> <br>
> Looking at shaders by hand, it makes the URB between TCS and TES go from<br>
> containing 32 per-vertex varyings per tessellation shader pair to a more<br>
> reasonable 8-12.  For a 3-vertex patch, that's at least half the URB<br>
> space no matter how big the patch section is.<br>
> ---<br>
>   src/intel/vulkan/anv_pipeline.c | 58 ++++++++++++++++-----------------<br>
>   1 file changed, 28 insertions(+), 30 deletions(-)<br>
> <br>
> diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c<br>
> index d4d31a43213..7708486c442 100644<br>
> --- a/src/intel/vulkan/anv_pipeline.c<br>
> +++ b/src/intel/vulkan/anv_pipeline.c<br>
> @@ -472,24 +472,17 @@ anv_pipeline_hash_compute(struct anv_pipeline *pipeline,<br>
>      _mesa_sha1_final(&ctx, sha1_out);<br>
>   }<br>
>   <br>
> -static nir_shader *<br>
> -anv_pipeline_compile(struct anv_pipeline *pipeline,<br>
> -                     void *mem_ctx,<br>
> -                     struct anv_pipeline_layout *layout,<br>
> -                     struct anv_pipeline_stage *stage,<br>
> -                     struct brw_stage_prog_data *prog_data,<br>
> -                     struct anv_pipeline_bind_map *map)<br>
> +static void<br>
> +anv_pipeline_lower_nir(struct anv_pipeline *pipeline,<br>
> +                       void *mem_ctx,<br>
> +                       struct anv_pipeline_stage *stage,<br>
> +                       struct anv_pipeline_layout *layout)<br>
>   {<br>
>      const struct brw_compiler *compiler =<br>
>         pipeline->device->instance->physicalDevice.compiler;<br>
>   <br>
> -   nir_shader *nir = anv_shader_compile_to_nir(pipeline, mem_ctx,<br>
> -                                               stage->module,<br>
> -                                               stage->entrypoint,<br>
> -                                               stage->stage,<br>
> -                                               stage->spec_info);<br>
> -   if (nir == NULL)<br>
> -      return NULL;<br>
> +   struct brw_stage_prog_data *prog_data = &stage->prog_data.base;<br>
> +   nir_shader *nir = stage->nir;<br>
>   <br>
>      NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);<br>
>   <br>
> @@ -531,15 +524,17 @@ anv_pipeline_compile(struct anv_pipeline *pipeline,<br>
>         pipeline->needs_data_cache = true;<br>
>   <br>
>      /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */<br>
> -   if (layout)<br>
> -      anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data, map);<br>
> +   if (layout) {<br>
> +      anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data,<br>
> +                                    &stage->bind_map);<br>
> +   }<br>
>   <br>
>      if (nir->info.stage != MESA_SHADER_COMPUTE)<br>
>         brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);<br>
>   <br>
>      assert(nir->num_uniforms == prog_data->nr_params * 4);<br>
>   <br>
> -   return nir;<br>
> +   stage->nir = nir;<br>
>   }<br>
>   <br>
>   static void<br>
> @@ -807,16 +802,12 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler,<br>
>      stage->key.wm.color_outputs_valid = (1 << num_rts) - 1;<br>
>   <br>
>      assert(num_rts <= max_rt);<br>
> -   assert(stage->bind_map.surface_count + num_rts <= 256);<br>
> -   memmove(stage->bind_map.surface_to_descriptor + num_rts,<br>
> -           stage->bind_map.surface_to_descriptor,<br>
> -           stage->bind_map.surface_count *<br>
> -           sizeof(*stage->bind_map.surface_to_descriptor));<br>
> +   assert(stage->bind_map.surface_count == 0);<br>
>      typed_memcpy(stage->bind_map.surface_to_descriptor,<br>
>                   rt_bindings, num_rts);<br>
>      stage->bind_map.surface_count += num_rts;<br>
>   <br>
> -   anv_fill_binding_table(&stage->prog_data.wm.base, num_rts);<br>
> +   anv_fill_binding_table(&stage->prog_data.wm.base, 0);<br>
>   }<br>
>   <br>
>   static const unsigned *<br>
> @@ -976,10 +967,11 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,<br>
>            .sampler_to_descriptor = stages[s].sampler_to_descriptor<br>
>         };<br>
>   <br>
> -      stages[s].nir = anv_pipeline_compile(pipeline, pipeline_ctx, layout,<br>
> -                                           &stages[s],<br>
> -                                           &stages[s].prog_data.base,<br>
> -                                           &stages[s].bind_map);<br>
> +      stages[s].nir = anv_shader_compile_to_nir(pipeline, pipeline_ctx,<br>
> +                                                stages[s].module,<br>
> +                                                stages[s].entrypoint,<br>
> +                                                stages[s].stage,<br>
> +                                                stages[s].spec_info);<br>
>         if (stages[s].nir == NULL)<br>
>            goto fail;<br>
>      }<br>
> @@ -1020,6 +1012,8 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,<br>
>   <br>
>         void *stage_ctx = ralloc_context(NULL);<br>
>   <br>
> +      anv_pipeline_lower_nir(pipeline, stage_ctx, &stages[s], layout);<br>
> +<br>
>         const unsigned *code;<br>
>         switch (s) {<br>
>         case MESA_SHADER_VERTEX:<br>
> @@ -1139,14 +1133,18 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,<br>
>   <br>
>         void *mem_ctx = ralloc_context(NULL);<br>
>   <br>
> -      stage.nir = anv_pipeline_compile(pipeline, mem_ctx, layout, &stage,<br>
> -                                       &stage.prog_data.base,<br>
> -                                       &stage.bind_map);<br>
> +      stage.nir = anv_shader_compile_to_nir(pipeline, mem_ctx,<br>
> +                                            stage.module,<br>
> +                                            stage.entrypoint,<br>
> +                                            stage.stage,<br>
> +                                            stage.spec_info);<br>
>         if (stage.nir == NULL) {<br>
>            ralloc_free(mem_ctx);<br>
>            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);<br>
>         }<br>
>   <br>
> +      anv_pipeline_lower_nir(pipeline, mem_ctx, &stage, layout);<br>
> +<br>
>         NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id,<br>
>                    &stage.prog_data.cs);<br>
>   <br>
> <br>
</blockquote></div></div>