[Mesa-dev] [PATCH v02 27/37] i965: Port gen8+ 3DSTATE_PS_EXTRA to genxml.
Rafael Antognolli
rafael.antognolli at intel.com
Mon Apr 24 22:19:22 UTC 2017
Emit 3DSTATE_PS_EXTRA on Gen8+ using brw_batch_emit helper, that uses
pack structs from genxml.
Signed-off-by: Rafael Antognolli <rafael.antognolli at intel.com>
---
src/mesa/drivers/dri/i965/Makefile.sources | 1 +-
src/mesa/drivers/dri/i965/brw_state.h | 10 +-
src/mesa/drivers/dri/i965/gen8_ps_state.c | 138 +-------------------
src/mesa/drivers/dri/i965/genX_state_upload.c | 95 ++++++++++++-
4 files changed, 94 insertions(+), 150 deletions(-)
delete mode 100644 src/mesa/drivers/dri/i965/gen8_ps_state.c
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index da09df8..7f25ae1 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -109,7 +109,6 @@ i965_FILES = \
gen8_gs_state.c \
gen8_hs_state.c \
gen8_multisample_state.c \
- gen8_ps_state.c \
gen8_surface_state.c \
gen8_viewport_state.c \
gen8_vs_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 5010237..a87bf3a 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -146,7 +146,6 @@ extern const struct brw_tracked_state gen8_index_buffer;
extern const struct brw_tracked_state gen8_multisample_state;
extern const struct brw_tracked_state gen8_pma_fix;
extern const struct brw_tracked_state gen8_ps_blend;
-extern const struct brw_tracked_state gen8_ps_extra;
extern const struct brw_tracked_state gen8_sf_clip_viewport;
extern const struct brw_tracked_state gen8_vertices;
extern const struct brw_tracked_state gen8_vf_topology;
@@ -284,15 +283,6 @@ void brw_update_renderbuffer_surfaces(struct brw_context *brw,
void gen7_check_surface_setup(uint32_t *surf, bool is_render_target);
void gen7_init_vtable_surface_functions(struct brw_context *brw);
-/* gen8_ps_state.c */
-void gen8_upload_ps_state(struct brw_context *brw,
- const struct brw_stage_state *stage_state,
- const struct brw_wm_prog_data *prog_data,
- uint32_t fast_clear_op);
-
-void gen8_upload_ps_extra(struct brw_context *brw,
- const struct brw_wm_prog_data *prog_data);
-
/* gen8_surface_state.c */
void gen8_init_vtable_surface_functions(struct brw_context *brw);
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
deleted file mode 100644
index 1a4a680..0000000
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <stdbool.h>
-#include "program/program.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "brw_wm.h"
-#include "intel_batchbuffer.h"
-
-void
-gen8_upload_ps_extra(struct brw_context *brw,
- const struct brw_wm_prog_data *prog_data)
-{
- struct gl_context *ctx = &brw->ctx;
- uint32_t dw1 = 0;
-
- dw1 |= GEN8_PSX_PIXEL_SHADER_VALID;
- dw1 |= prog_data->computed_depth_mode << GEN8_PSX_COMPUTED_DEPTH_MODE_SHIFT;
-
- if (prog_data->uses_kill)
- dw1 |= GEN8_PSX_KILL_ENABLE;
-
- if (prog_data->num_varying_inputs != 0)
- dw1 |= GEN8_PSX_ATTRIBUTE_ENABLE;
-
- if (prog_data->uses_src_depth)
- dw1 |= GEN8_PSX_USES_SOURCE_DEPTH;
-
- if (prog_data->uses_src_w)
- dw1 |= GEN8_PSX_USES_SOURCE_W;
-
- if (prog_data->persample_dispatch)
- dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE;
-
- /* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */
- if (prog_data->uses_sample_mask) {
- if (brw->gen >= 9) {
- if (prog_data->post_depth_coverage)
- dw1 |= BRW_PCICMS_DEPTH << GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
- else if (prog_data->inner_coverage && ctx->IntelConservativeRasterization)
- dw1 |= BRW_PSICMS_INNER << GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
- else
- dw1 |= BRW_PSICMS_NORMAL << GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
- }
- else {
- dw1 |= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK;
- }
- }
-
- if (prog_data->uses_omask)
- dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET;
-
- if (brw->gen >= 9 && prog_data->pulls_bary)
- dw1 |= GEN9_PSX_SHADER_PULLS_BARY;
-
- /* The stricter cross-primitive coherency guarantees that the hardware
- * gives us with the "Accesses UAV" bit set for at least one shader stage
- * and the "UAV coherency required" bit set on the 3DPRIMITIVE command are
- * redundant within the current image, atomic counter and SSBO GL APIs,
- * which all have very loose ordering and coherency requirements and
- * generally rely on the application to insert explicit barriers when a
- * shader invocation is expected to see the memory writes performed by the
- * invocations of some previous primitive. Regardless of the value of "UAV
- * coherency required", the "Accesses UAV" bits will implicitly cause an in
- * most cases useless DC flush when the lowermost stage with the bit set
- * finishes execution.
- *
- * It would be nice to disable it, but in some cases we can't because on
- * Gen8+ it also has an influence on rasterization via the PS UAV-only
- * signal (which could be set independently from the coherency mechanism in
- * the 3DSTATE_WM command on Gen7), and because in some cases it will
- * determine whether the hardware skips execution of the fragment shader or
- * not via the ThreadDispatchEnable signal. However if we know that
- * GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
- * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
- * difference so we may just disable it here.
- *
- * Gen8 hardware tries to compute ThreadDispatchEnable for us but doesn't
- * take into account KillPixels when no depth or stencil writes are enabled.
- * In order for occlusion queries to work correctly with no attachments, we
- * need to force-enable here.
- *
- * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | _NEW_COLOR
- */
- if ((prog_data->has_side_effects || prog_data->uses_kill) &&
- !brw_color_buffer_write_enabled(brw))
- dw1 |= GEN8_PSX_SHADER_HAS_UAV;
-
- if (prog_data->computed_stencil) {
- assert(brw->gen >= 9);
- dw1 |= GEN9_PSX_SHADER_COMPUTES_STENCIL;
- }
-
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2));
- OUT_BATCH(dw1);
- ADVANCE_BATCH();
-}
-
-static void
-upload_ps_extra(struct brw_context *brw)
-{
- /* BRW_NEW_FS_PROG_DATA */
- gen8_upload_ps_extra(brw, brw_wm_prog_data(brw->wm.base.prog_data));
-}
-
-const struct brw_tracked_state gen8_ps_extra = {
- .dirty = {
- .mesa = _NEW_BUFFERS | _NEW_COLOR,
- .brw = BRW_NEW_BLORP |
- BRW_NEW_CONTEXT |
- BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_FS_PROG_DATA |
- BRW_NEW_CONSERVATIVE_RASTERIZATION,
- },
- .emit = upload_ps_extra,
-};
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 0f7a222..7ed79b2 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -1668,6 +1668,99 @@ static const struct brw_tracked_state genX(raster_state) = {
},
.emit = genX(upload_raster),
};
+
+/* ---------------------------------------------------------------------- */
+
+static void
+genX(upload_ps_extra)(struct brw_context *brw)
+{
+ const struct brw_wm_prog_data *prog_data =
+ brw_wm_prog_data(brw->wm.base.prog_data);
+
+ brw_batch_emit(brw, GENX(3DSTATE_PS_EXTRA), pse) {
+ pse.PixelShaderValid = true;
+ pse.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;
+ pse.PixelShaderKillsPixel = prog_data->uses_kill;
+ pse.AttributeEnable = prog_data->num_varying_inputs != 0;
+ pse.PixelShaderUsesSourceDepth = prog_data->uses_src_depth;
+ pse.PixelShaderUsesSourceW = prog_data->uses_src_w;
+ pse.PixelShaderIsPerSample = prog_data->persample_dispatch;
+
+ /* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */
+ if (prog_data->uses_sample_mask) {
+#if GEN_GEN >= 9
+ struct gl_context *ctx = &brw->ctx;
+
+ if (prog_data->post_depth_coverage)
+ pse.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
+ else if (prog_data->inner_coverage && ctx->IntelConservativeRasterization)
+ pse.InputCoverageMaskState = ICMS_INNER_CONSERVATIVE;
+ else
+ pse.InputCoverageMaskState = ICMS_NORMAL;
+#else
+ pse.PixelShaderUsesInputCoverageMask = true;
+#endif
+ }
+
+ pse.oMaskPresenttoRenderTarget = prog_data->uses_omask;
+#if GEN_GEN >= 9
+ pse.PixelShaderPullsBary = prog_data->pulls_bary;
+#endif
+
+ /* The stricter cross-primitive coherency guarantees that the hardware
+ * gives us with the "Accesses UAV" bit set for at least one shader stage
+ * and the "UAV coherency required" bit set on the 3DPRIMITIVE command
+ * are redundant within the current image, atomic counter and SSBO GL
+ * APIs, which all have very loose ordering and coherency requirements
+ * and generally rely on the application to insert explicit barriers when
+ * a shader invocation is expected to see the memory writes performed by
+ * the invocations of some previous primitive. Regardless of the value
+ * of "UAV coherency required", the "Accesses UAV" bits will implicitly
+ * cause an in most cases useless DC flush when the lowermost stage with
+ * the bit set finishes execution.
+ *
+ * It would be nice to disable it, but in some cases we can't because on
+ * Gen8+ it also has an influence on rasterization via the PS UAV-only
+ * signal (which could be set independently from the coherency mechanism
+ * in the 3DSTATE_WM command on Gen7), and because in some cases it will
+ * determine whether the hardware skips execution of the fragment shader
+ * or not via the ThreadDispatchEnable signal. However if we know that
+ * GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
+ * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
+ * difference so we may just disable it here.
+ *
+ * Gen8 hardware tries to compute ThreadDispatchEnable for us but doesn't
+ * take into account KillPixels when no depth or stencil writes are
+ * enabled. In order for occlusion queries to work correctly with no
+ * attachments, we need to force-enable here.
+ *
+ * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS |
+ * _NEW_COLOR
+ */
+ if ((prog_data->has_side_effects || prog_data->uses_kill) &&
+ !brw_color_buffer_write_enabled(brw))
+ pse.PixelShaderHasUAV = true;;
+
+ if (prog_data->computed_stencil) {
+ assert(brw->gen >= 9);
+#if GEN_GEN >= 9
+ pse.PixelShaderComputesStencil = true;
+#endif
+ }
+ }
+}
+
+const struct brw_tracked_state genX(ps_extra) = {
+ .dirty = {
+ .mesa = _NEW_BUFFERS | _NEW_COLOR,
+ .brw = BRW_NEW_BLORP |
+ BRW_NEW_CONTEXT |
+ BRW_NEW_FRAGMENT_PROGRAM |
+ BRW_NEW_FS_PROG_DATA |
+ BRW_NEW_CONSERVATIVE_RASTERIZATION,
+ },
+ .emit = genX(upload_ps_extra),
+};
#endif
/* ---------------------------------------------------------------------- */
@@ -1957,7 +2050,7 @@ genX(init_atoms)(struct brw_context *brw)
&genX(sbe_state),
&genX(sf_state),
&gen8_ps_blend,
- &gen8_ps_extra,
+ &genX(ps_extra),
&genX(ps_state),
&genX(depth_stencil_state),
&genX(wm_state),
--
git-series 0.9.1
More information about the mesa-dev
mailing list