<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Fri, May 27, 2016 at 1:01 PM, Jordan Justen <span dir="ltr"><<a href="mailto:jordan.l.justen@intel.com" target="_blank">jordan.l.justen@intel.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5">On 2016-05-20 12:41:04, Jason Ekstrand wrote:<br>
> Instead of blasting it out as part of the pipeline, we put it in the<br>
> command buffer and only blast it out when it's really needed.  Since the<br>
> PUSH_CONSTANT_ALLOC commands aren't pipelined, they immediately cause a<br>
> stall which we would like to avoid.<br>
> ---<br>
>  src/intel/vulkan/anv_cmd_buffer.c     |  1 +<br>
>  src/intel/vulkan/anv_pipeline.c       | 22 ----------<br>
>  src/intel/vulkan/anv_private.h        |  2 +-<br>
>  src/intel/vulkan/genX_cmd_buffer.c    | 78 +++++++++++++++++++++++++++++++----<br>
>  src/intel/vulkan/genX_pipeline_util.h | 12 ------<br>
>  5 files changed, 71 insertions(+), 44 deletions(-)<br>
><br>
> diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c<br>
> index bba24e8..db7793e 100644<br>
> --- a/src/intel/vulkan/anv_cmd_buffer.c<br>
> +++ b/src/intel/vulkan/anv_cmd_buffer.c<br>
> @@ -130,6 +130,7 @@ anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer)<br>
>     state->descriptors_dirty = 0;<br>
>     state->push_constants_dirty = 0;<br>
>     state->pipeline = NULL;<br>
> +   state->push_constant_stages = 0;<br>
>     state->restart_index = UINT32_MAX;<br>
>     state->dynamic = default_dynamic_state;<br>
>     state->need_query_wa = true;<br>
> diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c<br>
> index faa0adb..c010f0f 100644<br>
> --- a/src/intel/vulkan/anv_pipeline.c<br>
> +++ b/src/intel/vulkan/anv_pipeline.c<br>
> @@ -939,28 +939,6 @@ anv_compute_urb_partition(struct anv_pipeline *pipeline)<br>
>     pipeline->urb.start[MESA_SHADER_TESS_EVAL] = push_constant_chunks;<br>
>     pipeline->urb.size[MESA_SHADER_TESS_EVAL] = 1;<br>
>     pipeline->urb.entries[MESA_SHADER_TESS_EVAL] = 0;<br>
> -<br>
> -   const unsigned stages =<br>
> -      _mesa_bitcount(pipeline->active_stages & VK_SHADER_STAGE_ALL_GRAPHICS);<br>
> -   unsigned size_per_stage = stages ? (push_constant_kb / stages) : 0;<br>
> -   unsigned used_kb = 0;<br>
> -<br>
> -   /* Broadwell+ and Haswell gt3 require that the push constant sizes be in<br>
> -    * units of 2KB.  Incidentally, these are the same platforms that have<br>
> -    * 32KB worth of push constant space.<br>
> -    */<br>
> -   if (push_constant_kb == 32)<br>
> -      size_per_stage &= ~1u;<br>
> -<br>
> -   for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) {<br>
> -      pipeline->urb.push_size[i] =<br>
> -         (pipeline->active_stages & (1 << i)) ? size_per_stage : 0;<br>
> -      used_kb += pipeline->urb.push_size[i];<br>
> -      assert(used_kb <= push_constant_kb);<br>
> -   }<br>
> -<br>
> -   pipeline->urb.push_size[MESA_SHADER_FRAGMENT] =<br>
> -      push_constant_kb - used_kb;<br>
>  }<br>
><br>
>  static void<br>
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h<br>
> index 33cbff9..80e768d 100644<br>
> --- a/src/intel/vulkan/anv_private.h<br>
> +++ b/src/intel/vulkan/anv_private.h<br>
> @@ -1177,6 +1177,7 @@ struct anv_cmd_state {<br>
>     uint32_t                                     restart_index;<br>
>     struct anv_vertex_binding                    vertex_bindings[MAX_VBS];<br>
>     struct anv_descriptor_set *                  descriptors[MAX_SETS];<br>
> +   VkShaderStageFlags                           push_constant_stages;<br>
>     struct anv_push_constants *                  push_constants[MESA_SHADER_STAGES];<br>
>     struct anv_state                             binding_tables[MESA_SHADER_STAGES];<br>
>     struct anv_state                             samplers[MESA_SHADER_STAGES];<br>
> @@ -1411,7 +1412,6 @@ struct anv_pipeline {<br>
>     uint32_t                                     scratch_start[MESA_SHADER_STAGES];<br>
>     uint32_t                                     total_scratch;<br>
>     struct {<br>
> -      uint8_t                                   push_size[MESA_SHADER_FRAGMENT + 1];<br>
>        uint32_t                                  start[MESA_SHADER_GEOMETRY + 1];<br>
>        uint32_t                                  size[MESA_SHADER_GEOMETRY + 1];<br>
>        uint32_t                                  entries[MESA_SHADER_GEOMETRY + 1];<br>
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c<br>
> index ee47c29..f5e3530 100644<br>
> --- a/src/intel/vulkan/genX_cmd_buffer.c<br>
> +++ b/src/intel/vulkan/genX_cmd_buffer.c<br>
> @@ -274,6 +274,72 @@ void genX(CmdPipelineBarrier)(<br>
>     }<br>
>  }<br>
><br>
> +static void<br>
> +cmd_buffer_alloc_push_constants(struct anv_cmd_buffer *cmd_buffer)<br>
> +{<br>
> +   VkShaderStageFlags stages = cmd_buffer->state.pipeline->active_stages;<br>
> +<br>
> +   /* In order to avoid thrash, we assume that vertex and fragment stages<br>
> +    * always exist.  In the rare case where one is missing *and* the other<br>
> +    * uses push concstants, this may be suboptimal.  However, avoiding stalls<br>
> +    * seems more important.<br>
> +    */<br>
> +   stages |= VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT;<br>
> +<br>
> +   if (stages == cmd_buffer->state.push_constant_stages)<br>
> +      return;<br>
> +<br>
> +#if GEN_GEN >= 8<br>
> +   const unsigned push_constant_kb = 32;<br>
> +#elif GEN_IS_HASWELL<br>
> +   const unsigned push_constant_kb = cmd_buffer->device-><a href="http://info.gt" rel="noreferrer" target="_blank">info.gt</a> == 3 ? 32 : 16;<br>
> +#else<br>
> +   const unsigned push_constant_kb = 16;<br>
> +#endif<br>
<br>
</div></div>I noted that this changed from the old code that just used 32k. I see<br>
that this matches src/mesa/drivers/dri/i965/gen7_urb.c. Maybe worth<br>
noting in the commit message?<br></blockquote><div><br></div><div>I think we just use 32KB at one point, but I fixed that a while ago.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Reviewed-by: Jordan Justen <<a href="mailto:jordan.l.justen@intel.com">jordan.l.justen@intel.com</a>><br></blockquote><div><br></div><div>Thanks<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<div><div class="h5"><br>
> +<br>
> +   const unsigned num_stages =<br>
> +      _mesa_bitcount(stages & VK_SHADER_STAGE_ALL_GRAPHICS);<br>
> +   unsigned size_per_stage = push_constant_kb / num_stages;<br>
> +<br>
> +   /* Broadwell+ and Haswell gt3 require that the push constant sizes be in<br>
> +    * units of 2KB.  Incidentally, these are the same platforms that have<br>
> +    * 32KB worth of push constant space.<br>
> +    */<br>
> +   if (push_constant_kb == 32)<br>
> +      size_per_stage &= ~1u;<br>
> +<br>
> +   uint32_t kb_used = 0;<br>
> +   for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) {<br>
> +      unsigned push_size = (stages & (1 << i)) ? size_per_stage : 0;<br>
> +      anv_batch_emit(&cmd_buffer->batch,<br>
> +                     GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc) {<br>
> +         alloc._3DCommandSubOpcode  = 18 + i;<br>
> +         alloc.ConstantBufferOffset = (push_size > 0) ? kb_used : 0;<br>
> +         alloc.ConstantBufferSize   = push_size;<br>
> +      }<br>
> +      kb_used += push_size;<br>
> +   }<br>
> +<br>
> +   anv_batch_emit(&cmd_buffer->batch,<br>
> +                  GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS), alloc) {<br>
> +      alloc.ConstantBufferOffset = kb_used;<br>
> +      alloc.ConstantBufferSize = push_constant_kb - kb_used;<br>
> +   }<br>
> +<br>
> +   cmd_buffer->state.push_constant_stages = stages;<br>
> +<br>
> +   /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS:<br>
> +    *<br>
> +    *    "The 3DSTATE_CONSTANT_VS must be reprogrammed prior to<br>
> +    *    the next 3DPRIMITIVE command after programming the<br>
> +    *    3DSTATE_PUSH_CONSTANT_ALLOC_VS"<br>
> +    *<br>
> +    * Since 3DSTATE_PUSH_CONSTANT_ALLOC_VS is programmed as part of<br>
> +    * pipeline setup, we need to dirty push constants.<br>
> +    */<br>
> +   cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;<br>
> +}<br>
> +<br>
>  static uint32_t<br>
>  cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer)<br>
>  {<br>
> @@ -384,16 +450,10 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)<br>
><br>
>        anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);<br>
><br>
> -      /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS:<br>
> -       *<br>
> -       *    "The 3DSTATE_CONSTANT_VS must be reprogrammed prior to<br>
> -       *    the next 3DPRIMITIVE command after programming the<br>
> -       *    3DSTATE_PUSH_CONSTANT_ALLOC_VS"<br>
> -       *<br>
> -       * Since 3DSTATE_PUSH_CONSTANT_ALLOC_VS is programmed as part of<br>
> -       * pipeline setup, we need to dirty push constants.<br>
> +      /* If the pipeline changed, we may need to re-allocate push constant<br>
> +       * space in the URB.<br>
>         */<br>
> -      cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;<br>
> +      cmd_buffer_alloc_push_constants(cmd_buffer);<br>
>     }<br>
><br>
>  #if GEN_GEN <= 7<br>
> diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h<br>
> index ecbe436..2ed55e0 100644<br>
> --- a/src/intel/vulkan/genX_pipeline_util.h<br>
> +++ b/src/intel/vulkan/genX_pipeline_util.h<br>
> @@ -205,18 +205,6 @@ emit_urb_setup(struct anv_pipeline *pipeline)<br>
>     }<br>
>  #endif<br>
><br>
> -   unsigned push_start = 0;<br>
> -   for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {<br>
> -      unsigned push_size = pipeline->urb.push_size[i];<br>
> -      anv_batch_emit(&pipeline->batch,<br>
> -                     GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc) {<br>
> -         alloc._3DCommandSubOpcode  = 18 + i;<br>
> -         alloc.ConstantBufferOffset = (push_size > 0) ? push_start : 0;<br>
> -         alloc.ConstantBufferSize   = push_size;<br>
> -      }<br>
> -      push_start += pipeline->urb.push_size[i];<br>
> -   }<br>
> -<br>
>     for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {<br>
>        anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS), urb) {<br>
>           urb._3DCommandSubOpcode       = 48 + i;<br>
> --<br>
> 2.5.0.400.gff86faf<br>
><br>
</div></div>> _______________________________________________<br>
> mesa-dev mailing list<br>
> <a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
> <a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div><br></div></div>