[Mesa-dev] [PATCH v02 27/37] i965: Port gen8+ 3DSTATE_PS_EXTRA to genxml.

Rafael Antognolli rafael.antognolli at intel.com
Mon Apr 24 22:19:22 UTC 2017


Emit 3DSTATE_PS_EXTRA on Gen8+ using brw_batch_emit helper, that uses
pack structs from genxml.

Signed-off-by: Rafael Antognolli <rafael.antognolli at intel.com>
---
 src/mesa/drivers/dri/i965/Makefile.sources    |   1 +-
 src/mesa/drivers/dri/i965/brw_state.h         |  10 +-
 src/mesa/drivers/dri/i965/gen8_ps_state.c     | 138 +-------------------
 src/mesa/drivers/dri/i965/genX_state_upload.c |  95 ++++++++++++-
 4 files changed, 94 insertions(+), 150 deletions(-)
 delete mode 100644 src/mesa/drivers/dri/i965/gen8_ps_state.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index da09df8..7f25ae1 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -109,7 +109,6 @@ i965_FILES = \
 	gen8_gs_state.c \
 	gen8_hs_state.c \
 	gen8_multisample_state.c \
-	gen8_ps_state.c \
 	gen8_surface_state.c \
 	gen8_viewport_state.c \
 	gen8_vs_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 5010237..a87bf3a 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -146,7 +146,6 @@ extern const struct brw_tracked_state gen8_index_buffer;
 extern const struct brw_tracked_state gen8_multisample_state;
 extern const struct brw_tracked_state gen8_pma_fix;
 extern const struct brw_tracked_state gen8_ps_blend;
-extern const struct brw_tracked_state gen8_ps_extra;
 extern const struct brw_tracked_state gen8_sf_clip_viewport;
 extern const struct brw_tracked_state gen8_vertices;
 extern const struct brw_tracked_state gen8_vf_topology;
@@ -284,15 +283,6 @@ void brw_update_renderbuffer_surfaces(struct brw_context *brw,
 void gen7_check_surface_setup(uint32_t *surf, bool is_render_target);
 void gen7_init_vtable_surface_functions(struct brw_context *brw);
 
-/* gen8_ps_state.c */
-void gen8_upload_ps_state(struct brw_context *brw,
-                          const struct brw_stage_state *stage_state,
-                          const struct brw_wm_prog_data *prog_data,
-                          uint32_t fast_clear_op);
-
-void gen8_upload_ps_extra(struct brw_context *brw,
-                          const struct brw_wm_prog_data *prog_data);
-
 /* gen8_surface_state.c */
 
 void gen8_init_vtable_surface_functions(struct brw_context *brw);
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
deleted file mode 100644
index 1a4a680..0000000
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <stdbool.h>
-#include "program/program.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "brw_wm.h"
-#include "intel_batchbuffer.h"
-
-void
-gen8_upload_ps_extra(struct brw_context *brw,
-                     const struct brw_wm_prog_data *prog_data)
-{
-   struct gl_context *ctx = &brw->ctx;
-   uint32_t dw1 = 0;
-
-   dw1 |= GEN8_PSX_PIXEL_SHADER_VALID;
-   dw1 |= prog_data->computed_depth_mode << GEN8_PSX_COMPUTED_DEPTH_MODE_SHIFT;
-
-   if (prog_data->uses_kill)
-      dw1 |= GEN8_PSX_KILL_ENABLE;
-
-   if (prog_data->num_varying_inputs != 0)
-      dw1 |= GEN8_PSX_ATTRIBUTE_ENABLE;
-
-   if (prog_data->uses_src_depth)
-      dw1 |= GEN8_PSX_USES_SOURCE_DEPTH;
-
-   if (prog_data->uses_src_w)
-      dw1 |= GEN8_PSX_USES_SOURCE_W;
-
-   if (prog_data->persample_dispatch)
-      dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE;
-
-   /* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */
-   if (prog_data->uses_sample_mask) {
-      if (brw->gen >= 9) {
-         if (prog_data->post_depth_coverage)
-            dw1 |= BRW_PCICMS_DEPTH << GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
-         else if (prog_data->inner_coverage && ctx->IntelConservativeRasterization)
-            dw1 |= BRW_PSICMS_INNER << GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
-         else
-            dw1 |= BRW_PSICMS_NORMAL << GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
-      }
-      else {
-         dw1 |= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK;
-      }
-   }
-
-   if (prog_data->uses_omask)
-      dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET;
-
-   if (brw->gen >= 9 && prog_data->pulls_bary)
-      dw1 |= GEN9_PSX_SHADER_PULLS_BARY;
-
-   /* The stricter cross-primitive coherency guarantees that the hardware
-    * gives us with the "Accesses UAV" bit set for at least one shader stage
-    * and the "UAV coherency required" bit set on the 3DPRIMITIVE command are
-    * redundant within the current image, atomic counter and SSBO GL APIs,
-    * which all have very loose ordering and coherency requirements and
-    * generally rely on the application to insert explicit barriers when a
-    * shader invocation is expected to see the memory writes performed by the
-    * invocations of some previous primitive.  Regardless of the value of "UAV
-    * coherency required", the "Accesses UAV" bits will implicitly cause an in
-    * most cases useless DC flush when the lowermost stage with the bit set
-    * finishes execution.
-    *
-    * It would be nice to disable it, but in some cases we can't because on
-    * Gen8+ it also has an influence on rasterization via the PS UAV-only
-    * signal (which could be set independently from the coherency mechanism in
-    * the 3DSTATE_WM command on Gen7), and because in some cases it will
-    * determine whether the hardware skips execution of the fragment shader or
-    * not via the ThreadDispatchEnable signal.  However if we know that
-    * GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
-    * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
-    * difference so we may just disable it here.
-    *
-    * Gen8 hardware tries to compute ThreadDispatchEnable for us but doesn't
-    * take into account KillPixels when no depth or stencil writes are enabled.
-    * In order for occlusion queries to work correctly with no attachments, we
-    * need to force-enable here.
-    *
-    * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | _NEW_COLOR
-    */
-   if ((prog_data->has_side_effects || prog_data->uses_kill) &&
-       !brw_color_buffer_write_enabled(brw))
-      dw1 |= GEN8_PSX_SHADER_HAS_UAV;
-
-   if (prog_data->computed_stencil) {
-      assert(brw->gen >= 9);
-      dw1 |= GEN9_PSX_SHADER_COMPUTES_STENCIL;
-   }
-
-   BEGIN_BATCH(2);
-   OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2));
-   OUT_BATCH(dw1);
-   ADVANCE_BATCH();
-}
-
-static void
-upload_ps_extra(struct brw_context *brw)
-{
-   /* BRW_NEW_FS_PROG_DATA */
-   gen8_upload_ps_extra(brw, brw_wm_prog_data(brw->wm.base.prog_data));
-}
-
-const struct brw_tracked_state gen8_ps_extra = {
-   .dirty = {
-      .mesa  = _NEW_BUFFERS | _NEW_COLOR,
-      .brw   = BRW_NEW_BLORP |
-               BRW_NEW_CONTEXT |
-               BRW_NEW_FRAGMENT_PROGRAM |
-               BRW_NEW_FS_PROG_DATA |
-               BRW_NEW_CONSERVATIVE_RASTERIZATION,
-   },
-   .emit = upload_ps_extra,
-};
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 0f7a222..7ed79b2 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -1668,6 +1668,99 @@ static const struct brw_tracked_state genX(raster_state) = {
    },
    .emit = genX(upload_raster),
 };
+
+/* ---------------------------------------------------------------------- */
+
+static void
+genX(upload_ps_extra)(struct brw_context *brw)
+{
+   const struct brw_wm_prog_data *prog_data =
+      brw_wm_prog_data(brw->wm.base.prog_data);
+
+   brw_batch_emit(brw, GENX(3DSTATE_PS_EXTRA), pse) {
+      pse.PixelShaderValid = true;
+      pse.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;
+      pse.PixelShaderKillsPixel = prog_data->uses_kill;
+      pse.AttributeEnable = prog_data->num_varying_inputs != 0;
+      pse.PixelShaderUsesSourceDepth = prog_data->uses_src_depth;
+      pse.PixelShaderUsesSourceW = prog_data->uses_src_w;
+      pse.PixelShaderIsPerSample = prog_data->persample_dispatch;
+
+      /* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */
+      if (prog_data->uses_sample_mask) {
+#if GEN_GEN >= 9
+         struct gl_context *ctx = &brw->ctx;
+
+         if (prog_data->post_depth_coverage)
+            pse.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
+         else if (prog_data->inner_coverage && ctx->IntelConservativeRasterization)
+            pse.InputCoverageMaskState = ICMS_INNER_CONSERVATIVE;
+         else
+            pse.InputCoverageMaskState = ICMS_NORMAL;
+#else
+         pse.PixelShaderUsesInputCoverageMask = true;
+#endif
+      }
+
+      pse.oMaskPresenttoRenderTarget = prog_data->uses_omask;
+#if GEN_GEN >= 9
+      pse.PixelShaderPullsBary = prog_data->pulls_bary;
+#endif
+
+      /* The stricter cross-primitive coherency guarantees that the hardware
+       * gives us with the "Accesses UAV" bit set for at least one shader stage
+       * and the "UAV coherency required" bit set on the 3DPRIMITIVE command
+       * are redundant within the current image, atomic counter and SSBO GL
+       * APIs, which all have very loose ordering and coherency requirements
+       * and generally rely on the application to insert explicit barriers when
+       * a shader invocation is expected to see the memory writes performed by
+       * the invocations of some previous primitive.  Regardless of the value
+       * of "UAV coherency required", the "Accesses UAV" bits will implicitly
+       * cause an in most cases useless DC flush when the lowermost stage with
+       * the bit set finishes execution.
+       *
+       * It would be nice to disable it, but in some cases we can't because on
+       * Gen8+ it also has an influence on rasterization via the PS UAV-only
+       * signal (which could be set independently from the coherency mechanism
+       * in the 3DSTATE_WM command on Gen7), and because in some cases it will
+       * determine whether the hardware skips execution of the fragment shader
+       * or not via the ThreadDispatchEnable signal.  However if we know that
+       * GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
+       * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
+       * difference so we may just disable it here.
+       *
+       * Gen8 hardware tries to compute ThreadDispatchEnable for us but doesn't
+       * take into account KillPixels when no depth or stencil writes are
+       * enabled.  In order for occlusion queries to work correctly with no
+       * attachments, we need to force-enable here.
+       *
+       * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS |
+       * _NEW_COLOR
+       */
+      if ((prog_data->has_side_effects || prog_data->uses_kill) &&
+          !brw_color_buffer_write_enabled(brw))
+         pse.PixelShaderHasUAV = true;;
+
+      if (prog_data->computed_stencil) {
+         assert(brw->gen >= 9);
+#if GEN_GEN >= 9
+         pse.PixelShaderComputesStencil = true;
+#endif
+      }
+   }
+}
+
+const struct brw_tracked_state genX(ps_extra) = {
+   .dirty = {
+      .mesa  = _NEW_BUFFERS | _NEW_COLOR,
+      .brw   = BRW_NEW_BLORP |
+               BRW_NEW_CONTEXT |
+               BRW_NEW_FRAGMENT_PROGRAM |
+               BRW_NEW_FS_PROG_DATA |
+               BRW_NEW_CONSERVATIVE_RASTERIZATION,
+   },
+   .emit = genX(upload_ps_extra),
+};
 #endif
 
 /* ---------------------------------------------------------------------- */
@@ -1957,7 +2050,7 @@ genX(init_atoms)(struct brw_context *brw)
       &genX(sbe_state),
       &genX(sf_state),
       &gen8_ps_blend,
-      &gen8_ps_extra,
+      &genX(ps_extra),
       &genX(ps_state),
       &genX(depth_stencil_state),
       &genX(wm_state),
-- 
git-series 0.9.1


More information about the mesa-dev mailing list