[Mesa-dev] [PATCH 3/3] radeonsi: ensure cache flushes happen before SET_PREDICATION packets
Marek Olšák
maraeo at gmail.com
Wed Aug 30 01:16:32 UTC 2017
For the series:
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Marek
On Fri, Aug 25, 2017 at 4:40 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> The data is read when the render_cond_atom is emitted, so we must
> delay emitting the atom until after the flush.
>
> Fixes: 0fe0320dc074 ("radeonsi: use optimal packet order when doing a pipeline sync")
> ---
> src/gallium/drivers/radeon/r600_pipe_common.h | 3 ++-
> src/gallium/drivers/radeon/r600_query.c | 9 ++++++---
> src/gallium/drivers/radeonsi/si_state_draw.c | 15 ++++++++++-----
> 3 files changed, 18 insertions(+), 9 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
> index dca56734cd7..f78e38b65af 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -54,21 +54,22 @@ struct u_log_context;
> #define R600_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
> #define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
> #define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
> #define R600_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
> #define R600_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
>
> #define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0)
> /* Pipeline & streamout query controls. */
> #define R600_CONTEXT_START_PIPELINE_STATS (1u << 1)
> #define R600_CONTEXT_STOP_PIPELINE_STATS (1u << 2)
> -#define R600_CONTEXT_PRIVATE_FLAG (1u << 3)
> +#define R600_CONTEXT_FLUSH_FOR_RENDER_COND (1u << 3)
> +#define R600_CONTEXT_PRIVATE_FLAG (1u << 4)
>
> /* special primitive types */
> #define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX
>
> #define R600_NOT_QUERY 0xffffffff
>
> /* Debug flags. */
> /* logging and features */
> #define DBG_TEX (1 << 0)
> #define DBG_NIR (1 << 1)
> diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
> index f937612bc1f..03ff1018a71 100644
> --- a/src/gallium/drivers/radeon/r600_query.c
> +++ b/src/gallium/drivers/radeon/r600_query.c
> @@ -1828,25 +1828,28 @@ static void r600_render_condition(struct pipe_context *ctx,
> * from launching the compute grid.
> */
> rctx->render_cond = NULL;
>
> ctx->get_query_result_resource(
> ctx, query, true, PIPE_QUERY_TYPE_U64, 0,
> &rquery->workaround_buf->b.b, rquery->workaround_offset);
>
> /* Settings this in the render cond atom is too late,
> * so set it here. */
> - rctx->flags |= rctx->screen->barrier_flags.L2_to_cp;
> -
> - atom->num_dw = 5;
> + rctx->flags |= rctx->screen->barrier_flags.L2_to_cp |
> + R600_CONTEXT_FLUSH_FOR_RENDER_COND;
>
> rctx->render_cond_force_off = old_force_off;
> + }
> +
> + if (needs_workaround) {
> + atom->num_dw = 5;
> } else {
> for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
> atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
>
> if (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
> atom->num_dw *= R600_MAX_STREAMS;
> }
> }
>
> rctx->render_cond = query;
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 1d8be49a480..81751d2186e 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -1385,34 +1385,39 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
> SI_CONTEXT_PS_PARTIAL_FLUSH |
> SI_CONTEXT_CS_PARTIAL_FLUSH))) {
> /* If we have to wait for idle, set all states first, so that all
> * SET packets are processed in parallel with previous draw calls.
> * Then upload descriptors, set shader pointers, and draw, and
> * prefetch at the end. This ensures that the time the CUs
> * are idle is very short. (there are only SET_SH packets between
> * the wait and the draw)
> */
> struct r600_atom *shader_pointers = &sctx->shader_pointers.atom;
> + unsigned masked_atoms = 1u << shader_pointers->id;
>
> - /* Emit all states except shader pointers. */
> - si_emit_all_states(sctx, info, 1 << shader_pointers->id);
> + if (unlikely(sctx->b.flags & R600_CONTEXT_FLUSH_FOR_RENDER_COND))
> + masked_atoms |= 1u << sctx->b.render_cond_atom.id;
> +
> + /* Emit all states except shader pointers and render condition. */
> + si_emit_all_states(sctx, info, masked_atoms);
> si_emit_cache_flush(sctx);
>
> /* <-- CUs are idle here. */
> if (!si_upload_graphics_shader_descriptors(sctx))
> return;
>
> /* Set shader pointers after descriptors are uploaded. */
> - if (si_is_atom_dirty(sctx, shader_pointers)) {
> + if (si_is_atom_dirty(sctx, shader_pointers))
> shader_pointers->emit(&sctx->b, NULL);
> - sctx->dirty_atoms = 0;
> - }
> + if (si_is_atom_dirty(sctx, &sctx->b.render_cond_atom))
> + sctx->b.render_cond_atom.emit(&sctx->b, NULL);
> + sctx->dirty_atoms = 0;
>
> si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
> /* <-- CUs are busy here. */
>
> /* Start prefetches after the draw has been started. Both will run
> * in parallel, but starting the draw first is more important.
> */
> if (sctx->b.chip_class >= CIK && sctx->prefetch_L2_mask)
> cik_emit_prefetch_L2(sctx);
> } else {
> --
> 2.11.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list