[Mesa-dev] [PATCH 3/3] radeonsi: ensure cache flushes happen before SET_PREDICATION packets
Nicolai Hähnle
nhaehnle at gmail.com
Fri Aug 25 14:40:46 UTC 2017
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
The data is read when the render_cond_atom is emitted, so we must
delay emitting the atom until after the flush.
Fixes: 0fe0320dc074 ("radeonsi: use optimal packet order when doing a pipeline sync")
---
src/gallium/drivers/radeon/r600_pipe_common.h | 3 ++-
src/gallium/drivers/radeon/r600_query.c | 9 ++++++---
src/gallium/drivers/radeonsi/si_state_draw.c | 15 ++++++++++-----
3 files changed, 18 insertions(+), 9 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index dca56734cd7..f78e38b65af 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -54,21 +54,22 @@ struct u_log_context;
#define R600_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
#define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
#define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
#define R600_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
#define R600_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
#define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0)
/* Pipeline & streamout query controls. */
#define R600_CONTEXT_START_PIPELINE_STATS (1u << 1)
#define R600_CONTEXT_STOP_PIPELINE_STATS (1u << 2)
-#define R600_CONTEXT_PRIVATE_FLAG (1u << 3)
+#define R600_CONTEXT_FLUSH_FOR_RENDER_COND (1u << 3)
+#define R600_CONTEXT_PRIVATE_FLAG (1u << 4)
/* special primitive types */
#define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX
#define R600_NOT_QUERY 0xffffffff
/* Debug flags. */
/* logging and features */
#define DBG_TEX (1 << 0)
#define DBG_NIR (1 << 1)
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index f937612bc1f..03ff1018a71 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -1828,25 +1828,28 @@ static void r600_render_condition(struct pipe_context *ctx,
* from launching the compute grid.
*/
rctx->render_cond = NULL;
ctx->get_query_result_resource(
ctx, query, true, PIPE_QUERY_TYPE_U64, 0,
&rquery->workaround_buf->b.b, rquery->workaround_offset);
/* Settings this in the render cond atom is too late,
* so set it here. */
- rctx->flags |= rctx->screen->barrier_flags.L2_to_cp;
-
- atom->num_dw = 5;
+ rctx->flags |= rctx->screen->barrier_flags.L2_to_cp |
+ R600_CONTEXT_FLUSH_FOR_RENDER_COND;
rctx->render_cond_force_off = old_force_off;
+ }
+
+ if (needs_workaround) {
+ atom->num_dw = 5;
} else {
for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
if (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
atom->num_dw *= R600_MAX_STREAMS;
}
}
rctx->render_cond = query;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 1d8be49a480..81751d2186e 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1385,34 +1385,39 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
SI_CONTEXT_PS_PARTIAL_FLUSH |
SI_CONTEXT_CS_PARTIAL_FLUSH))) {
/* If we have to wait for idle, set all states first, so that all
* SET packets are processed in parallel with previous draw calls.
* Then upload descriptors, set shader pointers, and draw, and
* prefetch at the end. This ensures that the time the CUs
* are idle is very short. (there are only SET_SH packets between
* the wait and the draw)
*/
struct r600_atom *shader_pointers = &sctx->shader_pointers.atom;
+ unsigned masked_atoms = 1u << shader_pointers->id;
- /* Emit all states except shader pointers. */
- si_emit_all_states(sctx, info, 1 << shader_pointers->id);
+ if (unlikely(sctx->b.flags & R600_CONTEXT_FLUSH_FOR_RENDER_COND))
+ masked_atoms |= 1u << sctx->b.render_cond_atom.id;
+
+ /* Emit all states except shader pointers and render condition. */
+ si_emit_all_states(sctx, info, masked_atoms);
si_emit_cache_flush(sctx);
/* <-- CUs are idle here. */
if (!si_upload_graphics_shader_descriptors(sctx))
return;
/* Set shader pointers after descriptors are uploaded. */
- if (si_is_atom_dirty(sctx, shader_pointers)) {
+ if (si_is_atom_dirty(sctx, shader_pointers))
shader_pointers->emit(&sctx->b, NULL);
- sctx->dirty_atoms = 0;
- }
+ if (si_is_atom_dirty(sctx, &sctx->b.render_cond_atom))
+ sctx->b.render_cond_atom.emit(&sctx->b, NULL);
+ sctx->dirty_atoms = 0;
si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
/* <-- CUs are busy here. */
/* Start prefetches after the draw has been started. Both will run
* in parallel, but starting the draw first is more important.
*/
if (sctx->b.chip_class >= CIK && sctx->prefetch_L2_mask)
cik_emit_prefetch_L2(sctx);
} else {
--
2.11.0
More information about the mesa-dev
mailing list