[Mesa-dev] [PATCH 11/20] anv/pipeline: Unify 3DSTATE_PS emission

Mon Nov 14 10:35:42 UTC 2016

On Sat, 2016-11-12 at 13:34 -0800, Jason Ekstrand wrote:

In this patch we no longer do:

  ps.RenderTargetResolveEnable = false;

It would be nice to be consistent and either initialise everything or
just drop all the lines setting things to false/0 and let the memset do
its job.

> ---
>  src/intel/vulkan/gen7_pipeline.c      | 57 +----------------------
> -----
>  src/intel/vulkan/gen8_pipeline.c      | 38 +------------------
>  src/intel/vulkan/genX_pipeline_util.h | 70
> +++++++++++++++++++++++++++++++++++
>  3 files changed, 72 insertions(+), 93 deletions(-)
> 
> diff --git a/src/intel/vulkan/gen7_pipeline.c
> b/src/intel/vulkan/gen7_pipeline.c
> index 40e1d81..dbec828 100644
> --- a/src/intel/vulkan/gen7_pipeline.c
> +++ b/src/intel/vulkan/gen7_pipeline.c
> @@ -107,6 +107,7 @@ genX(graphics_pipeline_create)(
>     emit_3dstate_vs(pipeline);
>     emit_3dstate_gs(pipeline);
>     emit_3dstate_sbe(pipeline);
> +   emit_3dstate_ps(pipeline);
>  
>     if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
>        anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) {
> @@ -117,16 +118,7 @@ genX(graphics_pipeline_create)(
>           wm.EarlyDepthStencilControl            = EDSC_NORMAL;
>           wm.PointRasterizationRule              =
> RASTRULE_UPPER_RIGHT;
>        }
> -
> -      /* Even if no fragments are ever dispatched, the hardware
> hangs if we
> -       * don't at least set the maximum number of threads.
> -       */
> -      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
> -         ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
> -      }
>     } else {
> -      const struct anv_shader_bin *fs_bin =
> -         pipeline->shaders[MESA_SHADER_FRAGMENT];
>        const struct brw_wm_prog_data *wm_prog_data =
> get_wm_prog_data(pipeline);
>  
>        if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 ||
> @@ -135,53 +127,6 @@ genX(graphics_pipeline_create)(
>        if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1)
>           anv_finishme("primitive_id needs sbe swizzling setup");
>  
> -      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
> -         ps.KernelStartPointer0           = fs_bin->kernel.offset;
> -         ps.KernelStartPointer1           = 0;
> -         ps.KernelStartPointer2           = fs_bin->kernel.offset +
> -                                            wm_prog_data-
> >prog_offset_2;
> -
> -         ps.ScratchSpaceBasePointer = (struct anv_address) {
> -            .bo = anv_scratch_pool_alloc(device, &device-
> >scratch_pool,
> -                                         MESA_SHADER_FRAGMENT,
> -                                         wm_prog_data-
> >base.total_scratch),
> -            .offset = 0,
> -         };
> -         ps.PerThreadScratchSpace         =
> scratch_space(&wm_prog_data->base);
> -
> -         ps.SamplerCount                  =
> get_sampler_count(fs_bin);
> -         ps.BindingTableEntryCount        =
> get_binding_table_entry_count(fs_bin);
> -
> -         ps.MaximumNumberofThreads        = devinfo->max_wm_threads
> - 1;
> -         ps.PushConstantEnable            = wm_prog_data-
> >base.nr_params > 0;
> -         ps.AttributeEnable               = wm_prog_data-
> >num_varying_inputs > 0;
> -         ps.oMaskPresenttoRenderTarget    = wm_prog_data-
> >uses_omask;
> -
> -         ps.RenderTargetFastClearEnable   = false;
> -         ps.DualSourceBlendEnable         = false;
> -         ps.RenderTargetResolveEnable     = false;
> -
> -         ps.PositionXYOffsetSelect        = wm_prog_data-
> >uses_pos_offset ?
> -                                            POSOFFSET_SAMPLE :
> POSOFFSET_NONE;
> -
> -         ps._32PixelDispatchEnable        = false;
> -         ps._16PixelDispatchEnable        = wm_prog_data-
> >dispatch_16;
> -         ps._8PixelDispatchEnable         = wm_prog_data-
> >dispatch_8;
> -
> -         ps.DispatchGRFStartRegisterForConstantSetupData0 =
> -            wm_prog_data->base.dispatch_grf_start_reg,
> -         ps.DispatchGRFStartRegisterForConstantSetupData1 = 0,
> -         ps.DispatchGRFStartRegisterForConstantSetupData2 =
> -            wm_prog_data->dispatch_grf_start_reg_2;
> -
> -         /* Haswell requires the sample mask to be set in this
> packet as well as
> -          * in 3DSTATE_SAMPLE_MASK; the values should match. */
> -         /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
> -#if GEN_IS_HASWELL
> -         ps.SampleMask                    = 0xff;
> -#endif
> -      }
> -
>        uint32_t samples = pCreateInfo->pMultisampleState ?
>                           pCreateInfo->pMultisampleState-
> >rasterizationSamples : 1;
>  
> diff --git a/src/intel/vulkan/gen8_pipeline.c
> b/src/intel/vulkan/gen8_pipeline.c
> index f2499dc..56eb032 100644
> --- a/src/intel/vulkan/gen8_pipeline.c
> +++ b/src/intel/vulkan/gen8_pipeline.c
> @@ -112,49 +112,13 @@ genX(graphics_pipeline_create)(
>     emit_3dstate_gs(pipeline);
>     emit_3dstate_vs(pipeline);
>     emit_3dstate_sbe(pipeline);
> +   emit_3dstate_ps(pipeline);
>  
> -   const int num_thread_bias = GEN_GEN == 8 ? 2 : 1;
>     if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
> -      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps);
>        anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA),
> extra) {
>           extra.PixelShaderValid = false;
>        }
>     } else {
> -      const struct anv_shader_bin *fs_bin =
> -         pipeline->shaders[MESA_SHADER_FRAGMENT];
> -
> -      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
> -         ps.KernelStartPointer0     = fs_bin->kernel.offset;
> -         ps.KernelStartPointer1     = 0;
> -         ps.KernelStartPointer2     = fs_bin->kernel.offset +
> -                                      wm_prog_data->prog_offset_2;
> -         ps._8PixelDispatchEnable   = wm_prog_data->dispatch_8;
> -         ps._16PixelDispatchEnable  = wm_prog_data->dispatch_16;
> -         ps._32PixelDispatchEnable  = false;
> -         ps.SingleProgramFlow       = false;
> -         ps.VectorMaskEnable        = true;
> -         ps.SamplerCount            = get_sampler_count(fs_bin);
> -         ps.BindingTableEntryCount  =
> get_binding_table_entry_count(fs_bin);
> -         ps.PushConstantEnable      = wm_prog_data->base.nr_params >
> 0;
> -         ps.PositionXYOffsetSelect  = wm_prog_data->uses_pos_offset
> ?
> -            POSOFFSET_SAMPLE: POSOFFSET_NONE;
> -
> -         ps.MaximumNumberofThreadsPerPSD = 64 - num_thread_bias;
> -
> -         ps.ScratchSpaceBasePointer = (struct anv_address) {
> -            .bo = anv_scratch_pool_alloc(device, &device-
> >scratch_pool,
> -                                         MESA_SHADER_FRAGMENT,
> -                                         wm_prog_data-
> >base.total_scratch),
> -            .offset = 0,
> -         };
> -         ps.PerThreadScratchSpace   = scratch_space(&wm_prog_data-
> >base);
> -
> -         ps.DispatchGRFStartRegisterForConstantSetupData0 =
> -            wm_prog_data->base.dispatch_grf_start_reg;
> -         ps.DispatchGRFStartRegisterForConstantSetupData1 = 0;
> -         ps.DispatchGRFStartRegisterForConstantSetupData2 =
> -            wm_prog_data->dispatch_grf_start_reg_2;
> -      }
>  
>        anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), ps) {
>           ps.PixelShaderValid              = true;
> diff --git a/src/intel/vulkan/genX_pipeline_util.h
> b/src/intel/vulkan/genX_pipeline_util.h
> index 3906529..1215e9b 100644
> --- a/src/intel/vulkan/genX_pipeline_util.h
> +++ b/src/intel/vulkan/genX_pipeline_util.h
> @@ -1134,4 +1134,74 @@ emit_3dstate_gs(struct anv_pipeline *pipeline)
>     }
>  }
>  
> +static void
> +emit_3dstate_ps(struct anv_pipeline *pipeline)
> +{
> +   MAYBE_UNUSED const struct gen_device_info *devinfo = &pipeline-
> >device->info;
> +   const struct anv_shader_bin *fs_bin =
> +      pipeline->shaders[MESA_SHADER_FRAGMENT];
> +
> +   if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
> +      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
> +#if GEN_GEN == 7
> +         /* Even if no fragments are ever dispatched, gen7 hardware
> hangs if
> +          * we don't at least set the maximum number of threads.
> +          */
> +         ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
> +#endif
> +      }
> +      return;
> +   }
> +
> +   const struct brw_wm_prog_data *wm_prog_data =
> get_wm_prog_data(pipeline);
> +
> +   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
> +      ps.KernelStartPointer0     = fs_bin->kernel.offset;
> +      ps.KernelStartPointer1     = 0;
> +      ps.KernelStartPointer2     = fs_bin->kernel.offset +
> +                                   wm_prog_data->prog_offset_2;
> +      ps._8PixelDispatchEnable   = wm_prog_data->dispatch_8;
> +      ps._16PixelDispatchEnable  = wm_prog_data->dispatch_16;
> +      ps._32PixelDispatchEnable  = false;
> +
> +      ps.SingleProgramFlow       = false;
> +      ps.VectorMaskEnable        = true;
> +      ps.SamplerCount            = get_sampler_count(fs_bin);
> +      ps.BindingTableEntryCount  =
> get_binding_table_entry_count(fs_bin);
> +      ps.PushConstantEnable      = wm_prog_data->base.nr_params > 0;
> +      ps.PositionXYOffsetSelect  = wm_prog_data->uses_pos_offset ?
> +                                   POSOFFSET_SAMPLE: POSOFFSET_NONE;
> +#if GEN_GEN < 8
> +      ps.AttributeEnable         = wm_prog_data->num_varying_inputs
> > 0;
> +      ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;

Again if we are going to both trying to align the = column now is the
time to get this right while we are already moving everything.

> +      ps.DualSourceBlendEnable   = wm_prog_data->dual_src_blend;

This was previously false for gen7. If this is a fix maybe mention it
in the commit message or even a separate patch?

> +#endif
> +
> +#if GEN_IS_HASWELL
> +      /* Haswell requires the sample mask to be set in this packet
> as well
> +       * as in 3DSTATE_SAMPLE_MASK; the values should match.
> +       */
> +      ps.SampleMask                    = 0xff;
> +#endif
> +
> +#if GEN_GEN >= 9
> +      ps.MaximumNumberofThreadsPerPSD  = 64 - 1;
> +#elif GEN_GEN >= 8

This would make more sense as GEN_GEN == 8

> +      ps.MaximumNumberofThreadsPerPSD  = 64 - 2;
> +#else
> +      ps.MaximumNumberofThreads        = devinfo->max_wm_threads -
> 1;
> +#endif
> +
> +      ps.DispatchGRFStartRegisterForConstantSetupData0 =
> +         wm_prog_data->base.dispatch_grf_start_reg;
> +      ps.DispatchGRFStartRegisterForConstantSetupData1 = 0;
> +      ps.DispatchGRFStartRegisterForConstantSetupData2 =
> +         wm_prog_data->dispatch_grf_start_reg_2;
> +
> +      ps.PerThreadScratchSpace   = get_scratch_space(fs_bin);
> +      ps.ScratchSpaceBasePointer =
> +         get_scratch_address(pipeline, MESA_SHADER_FRAGMENT,
> fs_bin);
> +   }
> +}
> +
>  #endif /* GENX_PIPELINE_UTIL_H */