[Mesa-dev] [PATCH 11/20] anv/pipeline: Unify 3DSTATE_PS emission

Sat Nov 12 21:34:53 UTC 2016

---
 src/intel/vulkan/gen7_pipeline.c      | 57 +---------------------------
 src/intel/vulkan/gen8_pipeline.c      | 38 +------------------
 src/intel/vulkan/genX_pipeline_util.h | 70 +++++++++++++++++++++++++++++++++++
 3 files changed, 72 insertions(+), 93 deletions(-)

diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c
index 40e1d81..dbec828 100644
--- a/src/intel/vulkan/gen7_pipeline.c
+++ b/src/intel/vulkan/gen7_pipeline.c
@@ -107,6 +107,7 @@ genX(graphics_pipeline_create)(
    emit_3dstate_vs(pipeline);
    emit_3dstate_gs(pipeline);
    emit_3dstate_sbe(pipeline);
+   emit_3dstate_ps(pipeline);
 
    if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) {
@@ -117,16 +118,7 @@ genX(graphics_pipeline_create)(
          wm.EarlyDepthStencilControl            = EDSC_NORMAL;
          wm.PointRasterizationRule              = RASTRULE_UPPER_RIGHT;
       }
-
-      /* Even if no fragments are ever dispatched, the hardware hangs if we
-       * don't at least set the maximum number of threads.
-       */
-      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
-         ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
-      }
    } else {
-      const struct anv_shader_bin *fs_bin =
-         pipeline->shaders[MESA_SHADER_FRAGMENT];
       const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
 
       if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 ||
@@ -135,53 +127,6 @@ genX(graphics_pipeline_create)(
       if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1)
          anv_finishme("primitive_id needs sbe swizzling setup");
 
-      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
-         ps.KernelStartPointer0           = fs_bin->kernel.offset;
-         ps.KernelStartPointer1           = 0;
-         ps.KernelStartPointer2           = fs_bin->kernel.offset +
-                                            wm_prog_data->prog_offset_2;
-
-         ps.ScratchSpaceBasePointer = (struct anv_address) {
-            .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
-                                         MESA_SHADER_FRAGMENT,
-                                         wm_prog_data->base.total_scratch),
-            .offset = 0,
-         };
-         ps.PerThreadScratchSpace         = scratch_space(&wm_prog_data->base);
-
-         ps.SamplerCount                  = get_sampler_count(fs_bin);
-         ps.BindingTableEntryCount        = get_binding_table_entry_count(fs_bin);
-
-         ps.MaximumNumberofThreads        = devinfo->max_wm_threads - 1;
-         ps.PushConstantEnable            = wm_prog_data->base.nr_params > 0;
-         ps.AttributeEnable               = wm_prog_data->num_varying_inputs > 0;
-         ps.oMaskPresenttoRenderTarget    = wm_prog_data->uses_omask;
-
-         ps.RenderTargetFastClearEnable   = false;
-         ps.DualSourceBlendEnable         = false;
-         ps.RenderTargetResolveEnable     = false;
-
-         ps.PositionXYOffsetSelect        = wm_prog_data->uses_pos_offset ?
-                                            POSOFFSET_SAMPLE : POSOFFSET_NONE;
-
-         ps._32PixelDispatchEnable        = false;
-         ps._16PixelDispatchEnable        = wm_prog_data->dispatch_16;
-         ps._8PixelDispatchEnable         = wm_prog_data->dispatch_8;
-
-         ps.DispatchGRFStartRegisterForConstantSetupData0 =
-            wm_prog_data->base.dispatch_grf_start_reg,
-         ps.DispatchGRFStartRegisterForConstantSetupData1 = 0,
-         ps.DispatchGRFStartRegisterForConstantSetupData2 =
-            wm_prog_data->dispatch_grf_start_reg_2;
-
-         /* Haswell requires the sample mask to be set in this packet as well as
-          * in 3DSTATE_SAMPLE_MASK; the values should match. */
-         /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
-#if GEN_IS_HASWELL
-         ps.SampleMask                    = 0xff;
-#endif
-      }
-
       uint32_t samples = pCreateInfo->pMultisampleState ?
                          pCreateInfo->pMultisampleState->rasterizationSamples : 1;
 
diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c
index f2499dc..56eb032 100644
--- a/src/intel/vulkan/gen8_pipeline.c
+++ b/src/intel/vulkan/gen8_pipeline.c
@@ -112,49 +112,13 @@ genX(graphics_pipeline_create)(
    emit_3dstate_gs(pipeline);
    emit_3dstate_vs(pipeline);
    emit_3dstate_sbe(pipeline);
+   emit_3dstate_ps(pipeline);
 
-   const int num_thread_bias = GEN_GEN == 8 ? 2 : 1;
    if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
-      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps);
       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), extra) {
          extra.PixelShaderValid = false;
       }
    } else {
-      const struct anv_shader_bin *fs_bin =
-         pipeline->shaders[MESA_SHADER_FRAGMENT];
-
-      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
-         ps.KernelStartPointer0     = fs_bin->kernel.offset;
-         ps.KernelStartPointer1     = 0;
-         ps.KernelStartPointer2     = fs_bin->kernel.offset +
-                                      wm_prog_data->prog_offset_2;
-         ps._8PixelDispatchEnable   = wm_prog_data->dispatch_8;
-         ps._16PixelDispatchEnable  = wm_prog_data->dispatch_16;
-         ps._32PixelDispatchEnable  = false;
-         ps.SingleProgramFlow       = false;
-         ps.VectorMaskEnable        = true;
-         ps.SamplerCount            = get_sampler_count(fs_bin);
-         ps.BindingTableEntryCount  = get_binding_table_entry_count(fs_bin);
-         ps.PushConstantEnable      = wm_prog_data->base.nr_params > 0;
-         ps.PositionXYOffsetSelect  = wm_prog_data->uses_pos_offset ?
-            POSOFFSET_SAMPLE: POSOFFSET_NONE;
-
-         ps.MaximumNumberofThreadsPerPSD = 64 - num_thread_bias;
-
-         ps.ScratchSpaceBasePointer = (struct anv_address) {
-            .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
-                                         MESA_SHADER_FRAGMENT,
-                                         wm_prog_data->base.total_scratch),
-            .offset = 0,
-         };
-         ps.PerThreadScratchSpace   = scratch_space(&wm_prog_data->base);
-
-         ps.DispatchGRFStartRegisterForConstantSetupData0 =
-            wm_prog_data->base.dispatch_grf_start_reg;
-         ps.DispatchGRFStartRegisterForConstantSetupData1 = 0;
-         ps.DispatchGRFStartRegisterForConstantSetupData2 =
-            wm_prog_data->dispatch_grf_start_reg_2;
-      }
 
       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), ps) {
          ps.PixelShaderValid              = true;
diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h
index 3906529..1215e9b 100644
--- a/src/intel/vulkan/genX_pipeline_util.h
+++ b/src/intel/vulkan/genX_pipeline_util.h
@@ -1134,4 +1134,74 @@ emit_3dstate_gs(struct anv_pipeline *pipeline)
    }
 }
 
+static void
+emit_3dstate_ps(struct anv_pipeline *pipeline)
+{
+   MAYBE_UNUSED const struct gen_device_info *devinfo = &pipeline->device->info;
+   const struct anv_shader_bin *fs_bin =
+      pipeline->shaders[MESA_SHADER_FRAGMENT];
+
+   if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
+      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
+#if GEN_GEN == 7
+         /* Even if no fragments are ever dispatched, gen7 hardware hangs if
+          * we don't at least set the maximum number of threads.
+          */
+         ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
+#endif
+      }
+      return;
+   }
+
+   const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
+
+   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
+      ps.KernelStartPointer0     = fs_bin->kernel.offset;
+      ps.KernelStartPointer1     = 0;
+      ps.KernelStartPointer2     = fs_bin->kernel.offset +
+                                   wm_prog_data->prog_offset_2;
+      ps._8PixelDispatchEnable   = wm_prog_data->dispatch_8;
+      ps._16PixelDispatchEnable  = wm_prog_data->dispatch_16;
+      ps._32PixelDispatchEnable  = false;
+
+      ps.SingleProgramFlow       = false;
+      ps.VectorMaskEnable        = true;
+      ps.SamplerCount            = get_sampler_count(fs_bin);
+      ps.BindingTableEntryCount  = get_binding_table_entry_count(fs_bin);
+      ps.PushConstantEnable      = wm_prog_data->base.nr_params > 0;
+      ps.PositionXYOffsetSelect  = wm_prog_data->uses_pos_offset ?
+                                   POSOFFSET_SAMPLE: POSOFFSET_NONE;
+#if GEN_GEN < 8
+      ps.AttributeEnable         = wm_prog_data->num_varying_inputs > 0;
+      ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
+      ps.DualSourceBlendEnable   = wm_prog_data->dual_src_blend;
+#endif
+
+#if GEN_IS_HASWELL
+      /* Haswell requires the sample mask to be set in this packet as well
+       * as in 3DSTATE_SAMPLE_MASK; the values should match.
+       */
+      ps.SampleMask                    = 0xff;
+#endif
+
+#if GEN_GEN >= 9
+      ps.MaximumNumberofThreadsPerPSD  = 64 - 1;
+#elif GEN_GEN >= 8
+      ps.MaximumNumberofThreadsPerPSD  = 64 - 2;
+#else
+      ps.MaximumNumberofThreads        = devinfo->max_wm_threads - 1;
+#endif
+
+      ps.DispatchGRFStartRegisterForConstantSetupData0 =
+         wm_prog_data->base.dispatch_grf_start_reg;
+      ps.DispatchGRFStartRegisterForConstantSetupData1 = 0;
+      ps.DispatchGRFStartRegisterForConstantSetupData2 =
+         wm_prog_data->dispatch_grf_start_reg_2;
+
+      ps.PerThreadScratchSpace   = get_scratch_space(fs_bin);
+      ps.ScratchSpaceBasePointer =
+         get_scratch_address(pipeline, MESA_SHADER_FRAGMENT, fs_bin);
+   }
+}
+
 #endif /* GENX_PIPELINE_UTIL_H */
-- 
2.5.0.400.gff86faf