[Mesa-dev] [PATCH 2/2] anv/pipeline: Use ForceThreadDispatchEnable instead of AccessUAV.
Jason Ekstrand
jason at jlekstrand.net
Tue Feb 14 01:28:31 UTC 2017
The AccessUAV bit is not quite what we want because it's more about
coherency between storage operations than it is about dispatch. Also,
the 3DSTATE_PS_EXTRA::PixelShaderHasUAV bit seems to cause hangs on
Broadwell for unknown reasons so it's best to just leave it off. The
3DSTATE_WM::ForceThreadDispatchEnable bit, on the other hand, does
exactly what we want and seems to work fine.
This fixes GPU hangs with Dota 2 on Broadwell
---
src/intel/vulkan/genX_pipeline.c | 86 ++++++++++++++++------------------------
1 file changed, 35 insertions(+), 51 deletions(-)
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index 752661e..ffb6e38 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -1285,6 +1285,26 @@ emit_3dstate_gs(struct anv_pipeline *pipeline)
}
}
+static inline bool
+has_color_buffer_write_enabled(const struct anv_pipeline *pipeline)
+{
+ const struct anv_shader_bin *shader_bin =
+ pipeline->shaders[MESA_SHADER_FRAGMENT];
+ if (!shader_bin)
+ return false;
+
+ const struct anv_pipeline_bind_map *bind_map = &shader_bin->bind_map;
+ for (int i = 0; i < bind_map->surface_count; i++) {
+ if (bind_map->surface_to_descriptor[i].set !=
+ ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
+ continue;
+ if (bind_map->surface_to_descriptor[i].index != UINT8_MAX)
+ return true;
+ }
+
+ return false;
+}
+
static void
emit_3dstate_wm(struct anv_pipeline *pipeline, struct anv_subpass *subpass,
const VkPipelineMultisampleStateCreateInfo *multisample)
@@ -1312,6 +1332,21 @@ emit_3dstate_wm(struct anv_pipeline *pipeline, struct anv_subpass *subpass,
wm.BarycentricInterpolationMode =
wm_prog_data->barycentric_interp_modes;
+#if GEN_GEN >= 8
+ /* In most cases, the computation for the WM_INT::ThreadDispatchEnable
+ * does exactly what you want. However, when you don't have any color
+ * buffers or when depth/stencil writes are disabled, it misses a few
+ * cases. In those cases, we force it to dispatch the PS by using the
+ * 3DSTATE_WM::ForceThreadDispatchEnable bit. According to the docs,
+ * this bit is not validated and shouldn't be used. However, it seems
+ * to work fine and this is exactly the type of thing you would use such
+ * a bit for.
+ */
+ if ((wm_prog_data->has_side_effects || wm_prog_data->uses_kill) &&
+ !has_color_buffer_write_enabled(pipeline))
+ wm.ForceThreadDispatchEnable = ForceON;
+#endif
+
#if GEN_GEN < 8
/* FIXME: This needs a lot more work, cf gen7 upload_wm_state(). */
wm.ThreadDispatchEnable = true;
@@ -1449,26 +1484,6 @@ emit_3dstate_ps(struct anv_pipeline *pipeline,
}
}
-static inline bool
-has_color_buffer_write_enabled(const struct anv_pipeline *pipeline)
-{
- const struct anv_shader_bin *shader_bin =
- pipeline->shaders[MESA_SHADER_FRAGMENT];
- if (!shader_bin)
- return false;
-
- const struct anv_pipeline_bind_map *bind_map = &shader_bin->bind_map;
- for (int i = 0; i < bind_map->surface_count; i++) {
- if (bind_map->surface_to_descriptor[i].set !=
- ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
- continue;
- if (bind_map->surface_to_descriptor[i].index != UINT8_MAX)
- return true;
- }
-
- return false;
-}
-
#if GEN_GEN >= 8
static void
emit_3dstate_ps_extra(struct anv_pipeline *pipeline,
@@ -1499,37 +1514,6 @@ emit_3dstate_ps_extra(struct anv_pipeline *pipeline,
ps.PixelShaderKillsPixel = subpass->has_ds_self_dep ||
wm_prog_data->uses_kill;
- /* The stricter cross-primitive coherency guarantees that the hardware
- * gives us with the "Accesses UAV" bit set for at least one shader stage
- * and the "UAV coherency required" bit set on the 3DPRIMITIVE command are
- * redundant within the current image, atomic counter and SSBO GL APIs,
- * which all have very loose ordering and coherency requirements and
- * generally rely on the application to insert explicit barriers when a
- * shader invocation is expected to see the memory writes performed by the
- * invocations of some previous primitive. Regardless of the value of
- * "UAV coherency required", the "Accesses UAV" bits will implicitly cause
- * an in most cases useless DC flush when the lowermost stage with the bit
- * set finishes execution.
- *
- * It would be nice to disable it, but in some cases we can't because on
- * Gen8+ it also has an influence on rasterization via the PS UAV-only
- * signal (which could be set independently from the coherency mechanism
- * in the 3DSTATE_WM command on Gen7), and because in some cases it will
- * determine whether the hardware skips execution of the fragment shader
- * or not via the ThreadDispatchEnable signal. However if we know that
- * GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
- * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
- * difference so we may just disable it here.
- *
- * Gen8 hardware tries to compute ThreadDispatchEnable for us but doesn't
- * take into account KillPixels when no depth or stencil writes are
- * enabled. In order for occlusion queries to work correctly with no
- * attachments, we need to force-enable here.
- */
- if ((wm_prog_data->has_side_effects || wm_prog_data->uses_kill) &&
- !has_color_buffer_write_enabled(pipeline))
- ps.PixelShaderHasUAV = true;
-
#if GEN_GEN >= 9
ps.PixelShaderPullsBary = wm_prog_data->pulls_bary;
ps.InputCoverageMaskState = wm_prog_data->uses_sample_mask ?
--
2.5.0.400.gff86faf
More information about the mesa-dev
mailing list