[Mesa-dev] [PATCH 08/20] anv/pipeline: Unify 3DSTATE_GS emission

Sat Nov 12 22:54:44 UTC 2016

On Sat, 2016-11-12 at 13:34 -0800, Jason Ekstrand wrote:

I have two questions and two suggestions below. 

With the suggestions addressed and assuming the answer to both
questions is yes, Patch 7-8 are:

Reviewed-by: Timothy Arceri <timothy.arceri at collabora.com>

> ---
>  src/intel/vulkan/gen7_pipeline.c      | 48 +----------------------
>  src/intel/vulkan/gen8_pipeline.c      | 62 +----------------------
> ------
>  src/intel/vulkan/genX_pipeline_util.h | 73
> +++++++++++++++++++++++++++++++++++
>  3 files changed, 75 insertions(+), 108 deletions(-)
> 
> diff --git a/src/intel/vulkan/gen7_pipeline.c
> b/src/intel/vulkan/gen7_pipeline.c
> index e604c25..52577f5 100644
> --- a/src/intel/vulkan/gen7_pipeline.c
> +++ b/src/intel/vulkan/gen7_pipeline.c
> @@ -105,53 +105,7 @@ genX(graphics_pipeline_create)(
>  #endif
>  
>     emit_3dstate_vs(pipeline);
> -
> -   const struct brw_gs_prog_data *gs_prog_data =
> get_gs_prog_data(pipeline);
> -
> -   if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
> -      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs);
> -   } else {
> -      const struct anv_shader_bin *gs_bin =
> -         pipeline->shaders[MESA_SHADER_GEOMETRY];
> -
> -      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs) {
> -         gs.KernelStartPointer         = gs_bin->kernel.offset;
> -
> -         gs.ScratchSpaceBasePointer = (struct anv_address) {
> -            .bo = anv_scratch_pool_alloc(device, &device-
> >scratch_pool,
> -                                         MESA_SHADER_GEOMETRY,
> -                                         gs_prog_data-
> >base.base.total_scratch),
> -            .offset = 0,
> -         };
> -         gs.PerThreadScratchSpace      =
> scratch_space(&gs_prog_data->base.base);
> -
> -         gs.OutputVertexSize           = gs_prog_data-
> >output_vertex_size_hwords * 2 - 1;
> -         gs.OutputTopology             = gs_prog_data-
> >output_topology;
> -         gs.VertexURBEntryReadLength   = gs_prog_data-
> >base.urb_read_length;
> -         gs.IncludeVertexHandles       = gs_prog_data-
> >base.include_vue_handles;
> -
> -         gs.DispatchGRFStartRegisterForURBData =
> -            gs_prog_data->base.base.dispatch_grf_start_reg;
> -
> -         gs.SamplerCount              = get_sampler_count(gs_bin);
> -         gs.BindingTableEntryCount    =
> get_binding_table_entry_count(gs_bin);
> -
> -         gs.MaximumNumberofThreads     = devinfo->max_gs_threads -
> 1;
> -         /* This in the next dword on HSW. */
> -         gs.ControlDataFormat          = gs_prog_data-
> >control_data_format;
> -         gs.ControlDataHeaderSize      = gs_prog_data-
> >control_data_header_size_hwords;
> -         gs.InstanceControl            = MAX2(gs_prog_data-
> >invocations, 1) - 1;
> -         gs.DispatchMode               = gs_prog_data-
> >base.dispatch_mode;
> -         gs.StatisticsEnable           = true;
> -         gs.IncludePrimitiveID         = gs_prog_data-
> >include_primitive_id;
> -#     if (GEN_IS_HASWELL)
> -         gs.ReorderMode                = REORDER_TRAILING;

Shouldn't we have changed REORDER_TRAILING to TRAILING in
src/intel/genxml/gen75.xml in the previous patch?

> -#     else
> -         gs.ReorderEnable              = true;
> -#     endif
> -         gs.FunctionEnable             = true;
> -      }
> -   }
> +   emit_3dstate_gs(pipeline);
>  
>     if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
>        anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), sbe);
> diff --git a/src/intel/vulkan/gen8_pipeline.c
> b/src/intel/vulkan/gen8_pipeline.c
> index 1320a13..5816bd4 100644
> --- a/src/intel/vulkan/gen8_pipeline.c
> +++ b/src/intel/vulkan/gen8_pipeline.c
> @@ -53,9 +53,6 @@ genX(graphics_pipeline_create)(
>  {
>     ANV_FROM_HANDLE(anv_device, device, _device);
>     ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass);
> -   const struct anv_physical_device *physical_device =
> -      &device->instance->physicalDevice;
> -   const struct gen_device_info *devinfo = &physical_device->info;
>     struct anv_subpass *subpass = &pass->subpasses[pCreateInfo-
> >subpass];
>     struct anv_pipeline *pipeline;
>     VkResult result;
> @@ -112,64 +109,7 @@ genX(graphics_pipeline_create)(
>           wm_prog_data ? wm_prog_data->barycentric_interp_modes : 0;
>     }
>  
> -   if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
> -      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs);
> -   } else {
> -      const struct brw_gs_prog_data *gs_prog_data =
> get_gs_prog_data(pipeline);
> -      const struct anv_shader_bin *gs_bin =
> -         pipeline->shaders[MESA_SHADER_GEOMETRY];
> -
> -      uint32_t offset = 1;
> -      uint32_t length = (gs_prog_data->base.vue_map.num_slots + 1) /
> 2 - offset;
> -
> -      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs) {
> -         gs.SingleProgramFlow       = false;
> -         gs.KernelStartPointer      = gs_bin->kernel.offset;
> -         gs.VectorMaskEnable        = false;
> -         gs.SamplerCount            = get_sampler_count(gs_bin);
> -         gs.BindingTableEntryCount  =
> get_binding_table_entry_count(gs_bin);
> -         gs.ExpectedVertexCount     = gs_prog_data->vertices_in;
> -
> -         gs.ScratchSpaceBasePointer = (struct anv_address) {
> -            .bo = anv_scratch_pool_alloc(device, &device-
> >scratch_pool,
> -                                         MESA_SHADER_GEOMETRY,
> -                                         gs_prog_data-
> >base.base.total_scratch),
> -            .offset = 0,
> -         };
> -         gs.PerThreadScratchSpace   = scratch_space(&gs_prog_data-
> >base.base);
> -         gs.OutputVertexSize        = gs_prog_data-
> >output_vertex_size_hwords * 2 - 1;
> -         gs.OutputTopology          = gs_prog_data->output_topology;
> -         gs.VertexURBEntryReadLength = gs_prog_data-
> >base.urb_read_length;
> -         gs.IncludeVertexHandles    = gs_prog_data-
> >base.include_vue_handles;
> -
> -         gs.DispatchGRFStartRegisterForURBData =
> -            gs_prog_data->base.base.dispatch_grf_start_reg;
> -
> -         gs.MaximumNumberofThreads  = devinfo->max_gs_threads / 2 -
> 1;
> -         gs.ControlDataHeaderSize   = gs_prog_data-
> >control_data_header_size_hwords;
> -         gs.DispatchMode            = gs_prog_data-
> >base.dispatch_mode;
> -         gs.StatisticsEnable        = true;
> -         gs.IncludePrimitiveID      = gs_prog_data-
> >include_primitive_id;
> -         gs.ReorderMode             = TRAILING;
> -         gs.FunctionEnable          = true;
> -
> -         gs.ControlDataFormat       = gs_prog_data-
> >control_data_format;
> -
> -         gs.StaticOutput            = gs_prog_data-
> >static_vertex_count >= 0;
> -         gs.StaticOutputVertexCount =
> -            gs_prog_data->static_vertex_count >= 0 ?
> -            gs_prog_data->static_vertex_count : 0;
> -
> -         /* FIXME: mesa sets this based on ctx-
> >Transform.ClipPlanesEnabled:
> -          * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v)
> -          * UserClipDistanceCullTestEnableBitmask(v)
> -          */
> -
> -         gs.VertexURBEntryOutputReadOffset = offset;
> -         gs.VertexURBEntryOutputLength = length;
> -      }
> -   }
> -
> +   emit_3dstate_gs(pipeline);
>     emit_3dstate_vs(pipeline);
>  
>     const int num_thread_bias = GEN_GEN == 8 ? 2 : 1;
> diff --git a/src/intel/vulkan/genX_pipeline_util.h
> b/src/intel/vulkan/genX_pipeline_util.h
> index 4fa96c8..515cc9a 100644
> --- a/src/intel/vulkan/genX_pipeline_util.h
> +++ b/src/intel/vulkan/genX_pipeline_util.h
> @@ -1053,4 +1053,77 @@ emit_3dstate_vs(struct anv_pipeline *pipeline)
>     }
>  }
>  
> +static void
> +emit_3dstate_gs(struct anv_pipeline *pipeline)
> +{
> +   const struct gen_device_info *devinfo = &pipeline->device->info;
> +   const struct anv_shader_bin *gs_bin =
> +      pipeline->shaders[MESA_SHADER_GEOMETRY];
> +
> +   if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
> +      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs);
> +      return;
> +   }
> +
> +   const struct brw_gs_prog_data *gs_prog_data =
> get_gs_prog_data(pipeline);
> +
> +   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs) {
> +      gs.FunctionEnable          = true;
> +      gs.StatisticsEnable        = true;
> +      gs.KernelStartPointer      = gs_bin->kernel.offset;
> +      gs.DispatchMode            = gs_prog_data->base.dispatch_mode;
> +
> +      gs.SingleProgramFlow       = false;
> +      gs.VectorMaskEnable        = false;
> +      gs.SamplerCount            = get_sampler_count(gs_bin);
> +      gs.BindingTableEntryCount  =
> get_binding_table_entry_count(gs_bin);
> +      gs.IncludeVertexHandles    = gs_prog_data-
> >base.include_vue_handles;
> +      gs.IncludePrimitiveID      = gs_prog_data-
> >include_primitive_id;
> +
> +      if (GEN_GEN == 8) {

Just checking this doesn't need to be GEN_GEN >= 8 ??

> +         /* Broadwell is weird.  It needs us to divide by 2. */
> +         gs.MaximumNumberofThreads = devinfo->max_gs_threads / 2 -
> 1;
> +      } else {
> +         gs.MaximumNumberofThreads = devinfo->max_gs_threads - 1;
> +      }
> +
> +      gs.OutputVertexSize        = gs_prog_data-
> >output_vertex_size_hwords * 2 - 1;
> +      gs.OutputTopology          = gs_prog_data->output_topology;
> +      gs.VertexURBEntryReadLength = gs_prog_data-
> >base.urb_read_length;
> +      gs.ControlDataFormat       = gs_prog_data-
> >control_data_format;
> +      gs.ControlDataHeaderSize   = gs_prog_data-
> >control_data_header_size_hwords;
> +      gs.InstanceControl         = MAX2(gs_prog_data->invocations,
> 1) - 1;

This wasn't previously set for gen8 I take it this doesn't matter?

> +#if GEN_GEN >= 8 || GEN_IS_HASWELL
> +      gs.ReorderMode             = TRAILING;
> +#else
> +      gs.ReorderEnable           = true;
> +#endif

Maybe push this block over 1 more space so everything is aligned
together?

> +
> +#if GEN_GEN >= 8
> +      gs.ExpectedVertexCount     = gs_prog_data->vertices_in;
> +      gs.StaticOutput            = gs_prog_data->static_vertex_count 
> >= 0;
> +      gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count 
> >= 0 ?
> +                                   gs_prog_data->static_vertex_count 
> : 0;
> +#endif
> +
> +      gs.VertexURBEntryReadOffset = 0;
> +      gs.VertexURBEntryReadLength = gs_prog_data-
> >base.urb_read_length;
> +      gs.DispatchGRFStartRegisterForURBData =
> +         gs_prog_data->base.base.dispatch_grf_start_reg;
> +
> +#if GEN_GEN >= 8
> +      gs.VertexURBEntryOutputReadOffset = get_urb_output_offset();
> +      gs.VertexURBEntryOutputLength     =
> get_urb_output_length(gs_bin);
> +
> +     /* TODO */
> +      gs.UserClipDistanceClipTestEnableBitmask = 0;
> +      gs.UserClipDistanceCullTestEnableBitmask = 0;
> +#endif
> +
> +      gs.PerThreadScratchSpace   = get_scratch_space(gs_bin);
> +      gs.ScratchSpaceBasePointer =
> +         get_scratch_address(pipeline, MESA_SHADER_GEOMETRY,
> gs_bin);
> +   }
> +}
> +
>  #endif /* GENX_PIPELINE_UTIL_H */