[Mesa-dev] [PATCH] r600/eg: rework atomic counter emission with flushes
Gert Wollny
gw.fossdev at gmail.com
Tue Aug 7 21:25:00 UTC 2018
I have not yet read the patch, just applied it and run, at one point it
locked up again, blancking the screen, and these piglits running:
arb_compute_shader-local-id -auto -fbo
arb_shader_image_load_store-bitcast -auto -fbo
arb_shader_image_load_store-max-images -auto -fbo
arb_shader_image_load_store-max-size --quick -auto -fbo
arb_shader_image_load_store-minmax -auto -fbo
arb_shader_image_load_store-qualifiers -auto -fbo
I couldn't kill the -bitcast process, didn't try the -local-id process,
but starting with -max-size I was able to kill -9 some tests and then
it resolved itself and piglit continued to run to the end.
I have to retry whether I can do the same without this patch, I thinkat least once it worked for me.
Best,
Gert
Am Dienstag, den 07.08.2018, 12:31 +1000 schrieb Dave Airlie:
> From: Dave Airlie <airlied at redhat.com>
>
> With the current code, we didn't do the space checks prior
> to atomic counter setup emission, but we also didn't add
> atomic counters to the space check so we could get a flush
> later as well.
>
> These flushes would be bad, and lead to problems with
> parallel tests. We have to ensure the atomic counter copy in,
> draw emits and counter copy out are kept in the same command
> submission unit.
>
> This reworks the code to drop some useless masks, make the
> counting separate to the emits, and make the space checker
> handle atomic counter space.
> ---
> src/gallium/drivers/r600/evergreen_compute.c | 11 ++++--
> .../drivers/r600/evergreen_hw_context.c | 2 +-
> src/gallium/drivers/r600/evergreen_state.c | 38 +++++++++++----
> ----
> src/gallium/drivers/r600/r600_hw_context.c | 7 +++-
> src/gallium/drivers/r600/r600_pipe.h | 14 ++++---
> src/gallium/drivers/r600/r600_state_common.c | 13 +++++--
> 6 files changed, 54 insertions(+), 31 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/evergreen_compute.c
> b/src/gallium/drivers/r600/evergreen_compute.c
> index 90eae1e2829..a77f58242e3 100644
> --- a/src/gallium/drivers/r600/evergreen_compute.c
> +++ b/src/gallium/drivers/r600/evergreen_compute.c
> @@ -715,7 +715,6 @@ static void compute_emit_cs(struct r600_context
> *rctx,
> rctx->cmd_buf_is_compute = true;
> }
>
> - r600_need_cs_space(rctx, 0, true);
> if (rctx->cs_shader_state.shader->ir_type ==
> PIPE_SHADER_IR_TGSI) {
> r600_shader_select(&rctx->b.b, rctx-
> >cs_shader_state.shader->sel, &compute_dirty);
> current = rctx->cs_shader_state.shader->sel-
> >current;
> @@ -742,16 +741,22 @@ static void compute_emit_cs(struct r600_context
> *rctx,
> }
> rctx->cs_block_grid_sizes[3] = rctx-
> >cs_block_grid_sizes[7] = 0;
> rctx-
> >driver_consts[PIPE_SHADER_COMPUTE].cs_block_grid_size_dirty = true;
> +
> + evergreen_emit_atomic_buffer_setup_count(rctx,
> current, combined_atomics, &atomic_used_mask);
> + r600_need_cs_space(rctx, 0, true,
> util_bitcount(atomic_used_mask));
> +
> if (need_buf_const) {
> eg_setup_buffer_constants(rctx,
> PIPE_SHADER_COMPUTE);
> }
> r600_update_driver_const_buffers(rctx, true);
>
> - if (evergreen_emit_atomic_buffer_setup(rctx,
> current, combined_atomics, &atomic_used_mask)) {
> + evergreen_emit_atomic_buffer_setup(rctx, true,
> combined_atomics, atomic_used_mask);
> + if (atomic_used_mask) {
> radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0,
> 0));
> radeon_emit(cs,
> EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
> }
> - }
> + } else
> + r600_need_cs_space(rctx, 0, true, 0);
>
> /* Initialize all the compute-related registers.
> *
> diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c
> b/src/gallium/drivers/r600/evergreen_hw_context.c
> index d3f3e227c1f..5e0e27b0f16 100644
> --- a/src/gallium/drivers/r600/evergreen_hw_context.c
> +++ b/src/gallium/drivers/r600/evergreen_hw_context.c
> @@ -109,7 +109,7 @@ void evergreen_cp_dma_clear_buffer(struct
> r600_context *rctx,
>
> r600_need_cs_space(rctx,
> 10 + (rctx->b.flags ?
> R600_MAX_FLUSH_CS_DWORDS : 0) +
> - R600_MAX_PFP_SYNC_ME_DWORDS,
> FALSE);
> + R600_MAX_PFP_SYNC_ME_DWORDS,
> FALSE, 0);
>
> /* Flush the caches for the first copy only. */
> if (rctx->b.flags) {
> diff --git a/src/gallium/drivers/r600/evergreen_state.c
> b/src/gallium/drivers/r600/evergreen_state.c
> index 57e81e30c27..cc41e114369 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -4030,7 +4030,6 @@ static void
> evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
>
> if (!buffers || !buffers[idx].buffer) {
> pipe_resource_reference(&abuf->buffer,
> NULL);
> - astate->enabled_mask &= ~(1 << i);
> continue;
> }
> buf = &buffers[idx];
> @@ -4038,7 +4037,6 @@ static void
> evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
> pipe_resource_reference(&abuf->buffer, buf->buffer);
> abuf->buffer_offset = buf->buffer_offset;
> abuf->buffer_size = buf->buffer_size;
> - astate->enabled_mask |= (1 << i);
> }
> }
>
> @@ -4868,20 +4866,15 @@ static void cayman_write_count_to_gds(struct
> r600_context *rctx,
> radeon_emit(cs, reloc);
> }
>
> -bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
> - struct r600_pipe_shader
> *cs_shader,
> - struct r600_shader_atomic
> *combined_atomics,
> - uint8_t *atomic_used_mask_p)
> +void evergreen_emit_atomic_buffer_setup_count(struct r600_context
> *rctx,
> + struct
> r600_pipe_shader *cs_shader,
> + struct
> r600_shader_atomic *combined_atomics,
> + uint8_t
> *atomic_used_mask_p)
> {
> - struct r600_atomic_buffer_state *astate = &rctx-
> >atomic_buffer_state;
> - unsigned pkt_flags = 0;
> uint8_t atomic_used_mask = 0;
> int i, j, k;
> bool is_compute = cs_shader ? true : false;
>
> - if (is_compute)
> - pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
> -
> for (i = 0; i < (is_compute ? 1 : EG_NUM_HW_STAGES); i++) {
> uint8_t num_atomic_stage;
> struct r600_pipe_shader *pshader;
> @@ -4914,8 +4907,25 @@ bool evergreen_emit_atomic_buffer_setup(struct
> r600_context *rctx,
> }
> }
> }
> + *atomic_used_mask_p = atomic_used_mask;
> +}
> +
> +void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
> + bool is_compute,
> + struct r600_shader_atomic
> *combined_atomics,
> + uint8_t atomic_used_mask)
> +{
> + struct r600_atomic_buffer_state *astate = &rctx-
> >atomic_buffer_state;
> + unsigned pkt_flags = 0;
> + uint32_t mask;
> +
> + if (is_compute)
> + pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
> +
> + mask = atomic_used_mask;
> + if (!mask)
> + return;
>
> - uint32_t mask = atomic_used_mask;
> while (mask) {
> unsigned atomic_index = u_bit_scan(&mask);
> struct r600_shader_atomic *atomic =
> &combined_atomics[atomic_index];
> @@ -4927,8 +4937,6 @@ bool evergreen_emit_atomic_buffer_setup(struct
> r600_context *rctx,
> else
> evergreen_emit_set_append_cnt(rctx, atomic,
> resource, pkt_flags);
> }
> - *atomic_used_mask_p = atomic_used_mask;
> - return true;
> }
>
> void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
> @@ -4940,7 +4948,7 @@ void evergreen_emit_atomic_buffer_save(struct
> r600_context *rctx,
> struct r600_atomic_buffer_state *astate = &rctx-
> >atomic_buffer_state;
> uint32_t pkt_flags = 0;
> uint32_t event = EVENT_TYPE_PS_DONE;
> - uint32_t mask = astate->enabled_mask;
> + uint32_t mask;
> uint64_t dst_offset;
> unsigned reloc;
>
> diff --git a/src/gallium/drivers/r600/r600_hw_context.c
> b/src/gallium/drivers/r600/r600_hw_context.c
> index 1cfc180ad6c..a2f5f637b20 100644
> --- a/src/gallium/drivers/r600/r600_hw_context.c
> +++ b/src/gallium/drivers/r600/r600_hw_context.c
> @@ -31,7 +31,7 @@
>
>
> void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
> - boolean count_draw_in)
> + boolean count_draw_in, unsigned num_atomics)
> {
> /* Flush the DMA IB if it's not empty. */
> if (radeon_emitted(ctx->b.dma.cs, 0))
> @@ -61,6 +61,9 @@ void r600_need_cs_space(struct r600_context *ctx,
> unsigned num_dw,
> num_dw += R600_MAX_FLUSH_CS_DWORDS +
> R600_MAX_DRAW_CS_DWORDS;
> }
>
> + /* add atomic counters, 8 pre + 8 post per counter + 16 post
> if any counters */
> + num_dw += (num_atomics * 16) + (num_atomics ? 16 : 0);
> +
> /* Count in r600_suspend_queries. */
> num_dw += ctx->b.num_cs_dw_queries_suspend;
>
> @@ -526,7 +529,7 @@ void r600_cp_dma_copy_buffer(struct r600_context
> *rctx,
>
> r600_need_cs_space(rctx,
> 10 + (rctx->b.flags ?
> R600_MAX_FLUSH_CS_DWORDS : 0) +
> - 3 + R600_MAX_PFP_SYNC_ME_DWORDS,
> FALSE);
> + 3 + R600_MAX_PFP_SYNC_ME_DWORDS,
> FALSE, 0);
>
> /* Flush the caches for the first copy only. */
> if (rctx->b.flags) {
> diff --git a/src/gallium/drivers/r600/r600_pipe.h
> b/src/gallium/drivers/r600/r600_pipe.h
> index 6204e3c557b..239005cab7f 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -446,8 +446,6 @@ struct r600_shader_state {
> };
>
> struct r600_atomic_buffer_state {
> - uint32_t enabled_mask;
> - uint32_t dirty_mask;
> struct pipe_shader_buffer buffer[EG_MAX_ATOMIC_BUFFERS];
> };
>
> @@ -773,7 +771,7 @@ void r600_context_gfx_flush(void *context,
> unsigned flags,
> struct pipe_fence_handle **fence);
> void r600_begin_new_cs(struct r600_context *ctx);
> void r600_flush_emit(struct r600_context *ctx);
> -void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
> boolean count_draw_in);
> +void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
> boolean count_draw_in, unsigned num_atomics);
> void r600_emit_pfp_sync_me(struct r600_context *rctx);
> void r600_cp_dma_copy_buffer(struct r600_context *rctx,
> struct pipe_resource *dst, uint64_t
> dst_offset,
> @@ -1067,10 +1065,14 @@ void r600_delete_shader_selector(struct
> pipe_context *ctx,
> struct r600_pipe_shader_selector
> *sel);
>
> struct r600_shader_atomic;
> -bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
> - struct r600_pipe_shader
> *cs_shader,
> +void evergreen_emit_atomic_buffer_setup_count(struct r600_context
> *rctx,
> + struct
> r600_pipe_shader *cs_shader,
> + struct
> r600_shader_atomic *combined_atomics,
> + uint8_t
> *atomic_used_mask_p);
> +void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
> + bool is_compute,
> struct r600_shader_atomic
> *combined_atomics,
> - uint8_t
> *atomic_used_mask_p);
> + uint8_t atomic_used_mask);
> void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
> bool is_compute,
> struct r600_shader_atomic
> *combined_atomics,
> diff --git a/src/gallium/drivers/r600/r600_state_common.c
> b/src/gallium/drivers/r600/r600_state_common.c
> index 402d95838f0..e6c1b0be97c 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -2085,8 +2085,9 @@ static void r600_draw_vbo(struct pipe_context
> *ctx, const struct pipe_draw_info
> : (rctx->tes_shader)? rctx->tes_shader-
> >info.properties[TGSI_PROPERTY_TES_PRIM_MODE]
> : info->mode;
>
> - if (rctx->b.chip_class >= EVERGREEN)
> - evergreen_emit_atomic_buffer_setup(rctx, NULL,
> combined_atomics, &atomic_used_mask);
> + if (rctx->b.chip_class >= EVERGREEN) {
> + evergreen_emit_atomic_buffer_setup_count(rctx, NULL,
> combined_atomics, &atomic_used_mask);
> + }
>
> if (index_size) {
> index_offset += info->start * index_size;
> @@ -2172,7 +2173,7 @@ static void r600_draw_vbo(struct pipe_context
> *ctx, const struct pipe_draw_info
> evergreen_setup_tess_constants(rctx, info,
> &num_patches);
>
> /* Emit states. */
> - r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE);
> + r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE,
> util_bitcount(atomic_used_mask));
> r600_flush_emit(rctx);
>
> mask = rctx->dirty_atoms;
> @@ -2180,6 +2181,10 @@ static void r600_draw_vbo(struct pipe_context
> *ctx, const struct pipe_draw_info
> r600_emit_atom(rctx, rctx-
> >atoms[u_bit_scan64(&mask)]);
> }
>
> + if (rctx->b.chip_class >= EVERGREEN) {
> + evergreen_emit_atomic_buffer_setup(rctx, false,
> combined_atomics, atomic_used_mask);
> + }
> +
> if (rctx->b.chip_class == CAYMAN) {
> /* Copied from radeonsi. */
> unsigned primgroup_size = 128; /* recommended
> without a GS */
> @@ -3284,7 +3289,7 @@ static void r600_set_active_query_state(struct
> pipe_context *ctx, boolean enable
> static void r600_need_gfx_cs_space(struct pipe_context *ctx,
> unsigned num_dw,
> bool include_draw_vbo)
> {
> - r600_need_cs_space((struct r600_context*)ctx, num_dw,
> include_draw_vbo);
> + r600_need_cs_space((struct r600_context*)ctx, num_dw,
> include_draw_vbo, 0);
> }
>
> /* keep this at the end of this file, please */
More information about the mesa-dev
mailing list