Mesa (master): anv: prepare pipeline for delayed emission of color writes
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Thu Apr 22 15:56:31 UTC 2021
Module: Mesa
Branch: master
Commit: 82eb7c04e7ee7b5a393550116c2e68fbdf78c3ca
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=82eb7c04e7ee7b5a393550116c2e68fbdf78c3ca
Author: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Date: Wed Mar 31 18:50:00 2021 +0300
anv: prepare pipeline for delayed emission of color writes
Namely we want to be able to emit the following dynamically :
* On Gfx 7/7.5 : 3DSTATE_VM, 3DSTATE_BLEND_STATE_POINTERS
* On Gfx 8+ : 3DSTATE_VM, 3DSTATE_BLEND_STATE_POINTERS,
3DSTATE_PS_BLEND
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Reviewed-by: Tapani Pälli <tapani.palli at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10206>
---
src/intel/vulkan/anv_private.h | 7 ++
src/intel/vulkan/genX_pipeline.c | 217 +++++++++++++++++++++++----------------
2 files changed, 137 insertions(+), 87 deletions(-)
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index a0948832d65..f059e232351 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -2239,6 +2239,7 @@ enum anv_cmd_dirty_bits {
ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1 << 22, /* VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT */
ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1 << 23, /* VK_DYNAMIC_STATE_STENCIL_OP_EXT */
ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1 << 24, /* VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT */
+ ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE = 1 << 25, /* VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT */
};
typedef uint32_t anv_cmd_dirty_mask_t;
@@ -3369,6 +3370,7 @@ struct anv_graphics_pipeline {
bool sample_shading_enable;
bool kill_pixel;
bool depth_bounds_test_enable;
+ bool force_fragment_thread_dispatch;
/* When primitive replication is used, subpass->view_mask will describe what
* views to replicate.
@@ -3389,12 +3391,17 @@ struct anv_graphics_pipeline {
uint32_t depth_stencil_state[3];
uint32_t clip[4];
uint32_t xfb_bo_pitch[4];
+ uint32_t wm[3];
+ uint32_t blend_state[MAX_RTS * 2];
} gfx7;
struct {
uint32_t sf[4];
uint32_t raster[5];
uint32_t wm_depth_stencil[3];
+ uint32_t wm[2];
+ uint32_t ps_blend[2];
+ uint32_t blend_state[1 + MAX_RTS * 2];
} gfx8;
struct {
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index 20edb9ddcae..f9fe6285be0 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -1129,7 +1129,8 @@ is_dual_src_blend_factor(VkBlendFactor factor)
static void
emit_cb_state(struct anv_graphics_pipeline *pipeline,
const VkPipelineColorBlendStateCreateInfo *info,
- const VkPipelineMultisampleStateCreateInfo *ms_info)
+ const VkPipelineMultisampleStateCreateInfo *ms_info,
+ uint32_t dynamic_states)
{
struct anv_device *device = pipeline->base.device;
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
@@ -1150,11 +1151,21 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
const uint32_t num_dwords = GENX(BLEND_STATE_length) +
GENX(BLEND_STATE_ENTRY_length) * surface_count;
- pipeline->blend_state =
- anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64);
+ uint32_t *blend_state_start, *state_pos;
+
+ if (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {
+ const struct intel_device_info *devinfo = &pipeline->base.device->info;
+ blend_state_start = devinfo->ver >= 8 ?
+ pipeline->gfx8.blend_state : pipeline->gfx7.blend_state;
+ pipeline->blend_state = ANV_STATE_NULL;
+ } else {
+ pipeline->blend_state =
+ anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64);
+ blend_state_start = pipeline->blend_state.map;
+ }
+ state_pos = blend_state_start;
bool has_writeable_rt = false;
- uint32_t *state_pos = pipeline->blend_state.map;
state_pos += GENX(BLEND_STATE_length);
#if GFX_VER >= 8
struct GENX(BLEND_STATE_ENTRY) bs0 = { 0 };
@@ -1285,29 +1296,38 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
}
#if GFX_VER >= 8
- anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS_BLEND), blend) {
- blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable;
- blend.HasWriteableRT = has_writeable_rt;
- blend.ColorBufferBlendEnable = bs0.ColorBufferBlendEnable;
- blend.SourceAlphaBlendFactor = bs0.SourceAlphaBlendFactor;
- blend.DestinationAlphaBlendFactor = bs0.DestinationAlphaBlendFactor;
- blend.SourceBlendFactor = bs0.SourceBlendFactor;
- blend.DestinationBlendFactor = bs0.DestinationBlendFactor;
- blend.AlphaTestEnable = false;
- blend.IndependentAlphaBlendEnable =
- blend_state.IndependentAlphaBlendEnable;
+ struct GENX(3DSTATE_PS_BLEND) blend = {
+ GENX(3DSTATE_PS_BLEND_header),
+ };
+ blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable;
+ blend.HasWriteableRT = has_writeable_rt;
+ blend.ColorBufferBlendEnable = bs0.ColorBufferBlendEnable;
+ blend.SourceAlphaBlendFactor = bs0.SourceAlphaBlendFactor;
+ blend.DestinationAlphaBlendFactor = bs0.DestinationAlphaBlendFactor;
+ blend.SourceBlendFactor = bs0.SourceBlendFactor;
+ blend.DestinationBlendFactor = bs0.DestinationBlendFactor;
+ blend.AlphaTestEnable = false;
+ blend.IndependentAlphaBlendEnable = blend_state.IndependentAlphaBlendEnable;
+
+ if (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {
+ GENX(3DSTATE_PS_BLEND_pack)(NULL, pipeline->gfx8.ps_blend, &blend);
+ } else {
+ anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS_BLEND), _blend)
+ _blend = blend;
}
#else
(void)has_writeable_rt;
#endif
- GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state);
+ GENX(BLEND_STATE_pack)(NULL, blend_state_start, &blend_state);
- anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
- bsp.BlendStatePointer = pipeline->blend_state.offset;
+ if (!(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE)) {
+ anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
+ bsp.BlendStatePointer = pipeline->blend_state.offset;
#if GFX_VER >= 8
- bsp.BlendStatePointerValid = true;
+ bsp.BlendStatePointerValid = true;
#endif
+ }
}
}
@@ -1906,87 +1926,110 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline, struct anv_subpass *subp
const VkPipelineRasterizationStateCreateInfo *raster,
const VkPipelineColorBlendStateCreateInfo *blend,
const VkPipelineMultisampleStateCreateInfo *multisample,
- const VkPipelineRasterizationLineStateCreateInfoEXT *line)
+ const VkPipelineRasterizationLineStateCreateInfoEXT *line,
+ const uint32_t dynamic_states)
{
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
- anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_WM), wm) {
- wm.StatisticsEnable = true;
- wm.LineEndCapAntialiasingRegionWidth = _05pixels;
- wm.LineAntialiasingRegionWidth = _10pixels;
- wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
+ struct GENX(3DSTATE_WM) wm = {
+ GENX(3DSTATE_WM_header),
+ };
+ wm.StatisticsEnable = true;
+ wm.LineEndCapAntialiasingRegionWidth = _05pixels;
+ wm.LineAntialiasingRegionWidth = _10pixels;
+ wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
- if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
- if (wm_prog_data->early_fragment_tests) {
+ if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
+ if (wm_prog_data->early_fragment_tests) {
wm.EarlyDepthStencilControl = EDSC_PREPS;
- } else if (wm_prog_data->has_side_effects) {
- wm.EarlyDepthStencilControl = EDSC_PSEXEC;
- } else {
- wm.EarlyDepthStencilControl = EDSC_NORMAL;
- }
+ } else if (wm_prog_data->has_side_effects) {
+ wm.EarlyDepthStencilControl = EDSC_PSEXEC;
+ } else {
+ wm.EarlyDepthStencilControl = EDSC_NORMAL;
+ }
#if GFX_VER >= 8
- /* Gfx8 hardware tries to compute ThreadDispatchEnable for us but
- * doesn't take into account KillPixels when no depth or stencil
- * writes are enabled. In order for occlusion queries to work
- * correctly with no attachments, we need to force-enable PS thread
- * dispatch.
- *
- * The BDW docs are pretty clear that that this bit isn't validated
- * and probably shouldn't be used in production:
- *
- * "This must always be set to Normal. This field should not be
- * tested for functional validation."
- *
- * Unfortunately, however, the other mechanism we have for doing this
- * is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW.
- * Given two bad options, we choose the one which works.
- */
- if ((wm_prog_data->has_side_effects || wm_prog_data->uses_kill) &&
- !has_color_buffer_write_enabled(pipeline, blend))
- wm.ForceThreadDispatchEnable = ForceON;
+ /* Gen8 hardware tries to compute ThreadDispatchEnable for us but
+ * doesn't take into account KillPixels when no depth or stencil
+ * writes are enabled. In order for occlusion queries to work
+ * correctly with no attachments, we need to force-enable PS thread
+ * dispatch.
+ *
+ * The BDW docs are pretty clear that that this bit isn't validated
+ * and probably shouldn't be used in production:
+ *
+ * "This must always be set to Normal. This field should not be
+ * tested for functional validation."
+ *
+ * Unfortunately, however, the other mechanism we have for doing this
+ * is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW.
+ * Given two bad options, we choose the one which works.
+ */
+ pipeline->force_fragment_thread_dispatch =
+ wm_prog_data->has_side_effects ||
+ wm_prog_data->uses_kill;
+
+ if (pipeline->force_fragment_thread_dispatch ||
+ !has_color_buffer_write_enabled(pipeline, blend)) {
+ /* Only set this value in non dynamic mode. */
+ wm.ForceThreadDispatchEnable =
+ !(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) ? ForceON : 0;
+ }
#endif
- wm.BarycentricInterpolationMode =
- wm_prog_data->barycentric_interp_modes;
+ wm.BarycentricInterpolationMode =
+ wm_prog_data->barycentric_interp_modes;
#if GFX_VER < 8
- wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
- wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
- wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
- wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
-
- /* If the subpass has a depth or stencil self-dependency, then we
- * need to force the hardware to do the depth/stencil write *after*
- * fragment shader execution. Otherwise, the writes may hit memory
- * before we get around to fetching from the input attachment and we
- * may get the depth or stencil value from the current draw rather
- * than the previous one.
- */
- wm.PixelShaderKillsPixel = subpass->has_ds_self_dep ||
- wm_prog_data->uses_kill;
-
- if (wm.PixelShaderComputedDepthMode != PSCDEPTH_OFF ||
- wm_prog_data->has_side_effects ||
- wm.PixelShaderKillsPixel ||
- has_color_buffer_write_enabled(pipeline, blend))
- wm.ThreadDispatchEnable = true;
-
- if (multisample && multisample->rasterizationSamples > 1) {
- if (wm_prog_data->persample_dispatch) {
- wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
- } else {
- wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
- }
- } else {
+ wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
+ wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
+ wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
+ wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
+
+ /* If the subpass has a depth or stencil self-dependency, then we
+ * need to force the hardware to do the depth/stencil write *after*
+ * fragment shader execution. Otherwise, the writes may hit memory
+ * before we get around to fetching from the input attachment and we
+ * may get the depth or stencil value from the current draw rather
+ * than the previous one.
+ */
+ wm.PixelShaderKillsPixel = subpass->has_ds_self_dep ||
+ wm_prog_data->uses_kill;
+
+ pipeline->force_fragment_thread_dispatch =
+ wm.PixelShaderComputedDepthMode != PSCDEPTH_OFF ||
+ wm_prog_data->has_side_effects ||
+ wm.PixelShaderKillsPixel;
+
+ if (pipeline->force_fragment_thread_dispatch ||
+ has_color_buffer_write_enabled(pipeline, blend)) {
+ /* Only set this value in non dynamic mode. */
+ wm.ThreadDispatchEnable = !(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE);
+ }
+
+ if (multisample && multisample->rasterizationSamples > 1) {
+ if (wm_prog_data->persample_dispatch) {
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
+ } else {
+ wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
}
- wm.MultisampleRasterizationMode =
- gfx7_ms_rast_mode(pipeline, ia, raster, multisample);
+ } else {
+ wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
+ }
+ wm.MultisampleRasterizationMode =
+ gfx7_ms_rast_mode(pipeline, ia, raster, multisample);
#endif
- wm.LineStippleEnable = line && line->stippledLineEnable;
- }
+ wm.LineStippleEnable = line && line->stippledLineEnable;
+ }
+
+ if (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {
+ const struct intel_device_info *devinfo = &pipeline->base.device->info;
+ uint32_t *dws = devinfo->ver >= 8 ? pipeline->gfx8.wm : pipeline->gfx7.wm;
+ GENX(3DSTATE_WM_pack)(NULL, dws, &wm);
+ } else {
+ anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_WM), _wm)
+ _wm = wm;
}
}
@@ -2307,7 +2350,7 @@ genX(graphics_pipeline_create)(
urb_deref_block_size);
emit_ms_state(pipeline, ms_info, dynamic_states);
emit_ds_state(pipeline, ds_info, dynamic_states, pass, subpass);
- emit_cb_state(pipeline, cb_info, ms_info);
+ emit_cb_state(pipeline, cb_info, ms_info, dynamic_states);
compute_kill_pixel(pipeline, ms_info, subpass);
emit_3dstate_clip(pipeline,
@@ -2347,7 +2390,7 @@ genX(graphics_pipeline_create)(
emit_3dstate_wm(pipeline, subpass,
pCreateInfo->pInputAssemblyState,
pCreateInfo->pRasterizationState,
- cb_info, ms_info, line_info);
+ cb_info, ms_info, line_info, dynamic_states);
emit_3dstate_ps(pipeline, cb_info, ms_info);
#if GFX_VER >= 8
emit_3dstate_ps_extra(pipeline, subpass,
More information about the mesa-commit
mailing list