[Mesa-dev] [PATCH 1/2] r600g: rework flusing and synchronization pattern v6
Marek Olšák
maraeo at gmail.com
Mon Dec 17 13:40:45 PST 2012
On Mon, Dec 17, 2012 at 8:28 PM, <j.glisse at gmail.com> wrote:
> From: Jerome Glisse <jglisse at redhat.com>
>
> This bring r600g allmost inline with closed source driver when
> it comes to flushing and synchronization pattern.
>
> v2-v4: history lost somewhere in outer space
> v5: Fix compute size of flushing, use define for flags, update
> worst case cs size requirement for flush, treat rs780 and
> newer as r7xx when it comes to streamout.
> v6: Fix num dw computation for framebuffer state, remove dead
> code, use define instead of hardcoded value.
>
> Signed-off-by: Jerome Glisse <jglisse at redhat.com>
> ---
> src/gallium/drivers/r600/evergreen_compute.c | 8 +-
> .../drivers/r600/evergreen_compute_internal.c | 4 +-
> src/gallium/drivers/r600/evergreen_state.c | 4 +-
> src/gallium/drivers/r600/r600.h | 16 +-
> src/gallium/drivers/r600/r600_hw_context.c | 179 +++++++--------------
> src/gallium/drivers/r600/r600_hw_context_priv.h | 2 +-
> src/gallium/drivers/r600/r600_state.c | 20 ++-
> src/gallium/drivers/r600/r600_state_common.c | 19 +--
> 8 files changed, 90 insertions(+), 162 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
> index 66b0cc6..ea75d80 100644
> --- a/src/gallium/drivers/r600/evergreen_compute.c
> +++ b/src/gallium/drivers/r600/evergreen_compute.c
> @@ -98,7 +98,7 @@ static void evergreen_cs_set_vertex_buffer(
>
> /* The vertex instructions in the compute shaders use the texture cache,
> * so we need to invalidate it. */
> - rctx->flags |= R600_CONTEXT_TEX_FLUSH;
> + rctx->flags |= R600_CONTEXT_GPU_FLUSH;
> state->enabled_mask |= 1 << vb_index;
> state->dirty_mask |= 1 << vb_index;
> state->atom.dirty = true;
> @@ -329,7 +329,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
> */
> r600_emit_command_buffer(ctx->cs, &ctx->start_compute_cs_cmd);
>
> - ctx->flags |= R600_CONTEXT_CB_FLUSH;
> + ctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
> r600_flush_emit(ctx);
>
> /* Emit colorbuffers. */
> @@ -409,7 +409,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
>
> /* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff
> */
> - ctx->flags |= R600_CONTEXT_CB_FLUSH;
> + ctx->flags |= R600_CONTEXT_GPU_FLUSH;
> r600_flush_emit(ctx);
>
> #if 0
> @@ -468,7 +468,7 @@ void evergreen_emit_cs_shader(
> r600_write_value(cs, r600_context_bo_reloc(rctx, kernel->code_bo,
> RADEON_USAGE_READ));
>
> - rctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH;
> + rctx->flags |= R600_CONTEXT_GPU_FLUSH;
> }
>
> static void evergreen_launch_grid(
> diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c
> index f7aebf2..94f556f 100644
> --- a/src/gallium/drivers/r600/evergreen_compute_internal.c
> +++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
> @@ -545,7 +545,7 @@ void evergreen_set_tex_resource(
> util_format_get_blockwidth(tmp->resource.b.b.format) *
> view->base.texture->width0*height*depth;
>
> - pipe->ctx->flags |= R600_CONTEXT_TEX_FLUSH;
> + pipe->ctx->flags |= R600_CONTEXT_GPU_FLUSH;
>
> evergreen_emit_force_reloc(res);
> evergreen_emit_force_reloc(res);
> @@ -604,7 +604,7 @@ void evergreen_set_const_cache(
> res->usage = RADEON_USAGE_READ;
> res->coher_bo_size = size;
>
> - pipe->ctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH;
> + pipe->ctx->flags |= R600_CONTEXT_GPU_FLUSH;
> }
>
> struct r600_resource* r600_compute_buffer_alloc_vram(
> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
> index 996c1b4..58964c4 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -1557,14 +1557,14 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
> uint32_t i, log_samples;
>
> if (rctx->framebuffer.state.nr_cbufs) {
> - rctx->flags |= R600_CONTEXT_CB_FLUSH;
> + rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
>
> if (rctx->framebuffer.state.cbufs[0]->texture->nr_samples > 1) {
> rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META;
> }
> }
> if (rctx->framebuffer.state.zsbuf) {
> - rctx->flags |= R600_CONTEXT_DB_FLUSH;
> + rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
> }
>
> util_copy_framebuffer_state(&rctx->framebuffer.state, state);
> diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
> index d15cd52..c351982 100644
> --- a/src/gallium/drivers/r600/r600.h
> +++ b/src/gallium/drivers/r600/r600.h
> @@ -182,17 +182,11 @@ struct r600_so_target {
> unsigned so_index;
> };
>
> -#define R600_CONTEXT_PS_PARTIAL_FLUSH (1 << 0)
> -#define R600_CONTEXT_CB_FLUSH (1 << 1)
> -#define R600_CONTEXT_DB_FLUSH (1 << 2)
> -#define R600_CONTEXT_SHADERCONST_FLUSH (1 << 3)
> -#define R600_CONTEXT_TEX_FLUSH (1 << 4)
> -#define R600_CONTEXT_VTX_FLUSH (1 << 5)
> -#define R600_CONTEXT_STREAMOUT_FLUSH (1 << 6)
> -#define R600_CONTEXT_WAIT_IDLE (1 << 7)
> -#define R600_CONTEXT_FLUSH_AND_INV (1 << 8)
> -#define R600_CONTEXT_HTILE_ERRATA (1 << 9)
> -#define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 10)
> +#define R600_CONTEXT_GPU_FLUSH (1 << 0)
> +#define R600_CONTEXT_STREAMOUT_FLUSH (1 << 1)
> +#define R600_CONTEXT_WAIT_IDLE (1 << 2)
> +#define R600_CONTEXT_FLUSH_AND_INV (1 << 3)
> +#define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 4)
>
> struct r600_context;
> struct r600_screen;
> diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
> index c7a357e..bce7cc8 100644
> --- a/src/gallium/drivers/r600/r600_hw_context.c
> +++ b/src/gallium/drivers/r600/r600_hw_context.c
> @@ -424,7 +424,7 @@ void r600_context_dirty_block(struct r600_context *ctx,
> LIST_ADDTAIL(&block->list,&ctx->dirty);
>
> if (block->flags & REG_FLAG_FLUSH_CHANGE) {
> - ctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
> + ctx->flags |= R600_CONTEXT_WAIT_IDLE;
> }
> }
> }
> @@ -595,16 +595,13 @@ out:
> void r600_flush_emit(struct r600_context *rctx)
> {
> struct radeon_winsys_cs *cs = rctx->cs;
> + unsigned cp_coher_cntl = 0;
> + unsigned emit_flush = 0;
>
> if (!rctx->flags) {
> return;
> }
>
> - if (rctx->flags & R600_CONTEXT_PS_PARTIAL_FLUSH) {
> - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
> - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
> - }
> -
> if (rctx->chip_class >= R700 &&
> (rctx->flags & R600_CONTEXT_FLUSH_AND_INV_CB_META)) {
> cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
> @@ -614,110 +611,55 @@ void r600_flush_emit(struct r600_context *rctx)
> if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) {
> cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
> cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
> -
> - /* DB flushes are special due to errata with hyperz, we need to
> - * insert a no-op, so that the cache has time to really flush.
> - */
> - if (rctx->chip_class <= R700 &&
> - rctx->flags & R600_CONTEXT_HTILE_ERRATA) {
> - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 31, 0);
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> - cs->buf[cs->cdw++] = 0xdeadcafe;
> + if (rctx->chip_class >= EVERGREEN) {
> + cp_coher_cntl = S_0085F0_CB0_DEST_BASE_ENA(1) |
> + S_0085F0_CB1_DEST_BASE_ENA(1) |
> + S_0085F0_CB2_DEST_BASE_ENA(1) |
> + S_0085F0_CB3_DEST_BASE_ENA(1) |
> + S_0085F0_CB4_DEST_BASE_ENA(1) |
> + S_0085F0_CB5_DEST_BASE_ENA(1) |
> + S_0085F0_CB6_DEST_BASE_ENA(1) |
> + S_0085F0_CB7_DEST_BASE_ENA(1) |
> + S_0085F0_CB8_DEST_BASE_ENA(1) |
> + S_0085F0_CB9_DEST_BASE_ENA(1) |
> + S_0085F0_CB10_DEST_BASE_ENA(1) |
> + S_0085F0_CB11_DEST_BASE_ENA(1) |
> + S_0085F0_DB_DEST_BASE_ENA(1) |
> + S_0085F0_TC_ACTION_ENA(1) |
> + S_0085F0_CB_ACTION_ENA(1) |
> + S_0085F0_DB_ACTION_ENA(1) |
> + S_0085F0_SH_ACTION_ENA(1) |
> + S_0085F0_SMX_ACTION_ENA(1) |
> + (1 << 20); /* unknown bit */
> + } else {
> + cp_coher_cntl = S_0085F0_SMX_ACTION_ENA(1) |
> + S_0085F0_SH_ACTION_ENA(1) |
> + S_0085F0_VC_ACTION_ENA(1) |
> + S_0085F0_TC_ACTION_ENA(1) |
> + (1 << 20); /* unknown bit */
> }
> }
>
> - if (rctx->flags & (R600_CONTEXT_CB_FLUSH |
> - R600_CONTEXT_DB_FLUSH |
> - R600_CONTEXT_SHADERCONST_FLUSH |
> - R600_CONTEXT_TEX_FLUSH |
> - R600_CONTEXT_VTX_FLUSH |
> - R600_CONTEXT_STREAMOUT_FLUSH)) {
> - /* anything left (cb, vtx, shader, streamout) can be flushed
> - * using the surface sync packet
> - */
> - unsigned flags = 0;
> -
> - if (rctx->flags & R600_CONTEXT_CB_FLUSH) {
> - flags |= S_0085F0_CB_ACTION_ENA(1) |
> - S_0085F0_CB0_DEST_BASE_ENA(1) |
> - S_0085F0_CB1_DEST_BASE_ENA(1) |
> - S_0085F0_CB2_DEST_BASE_ENA(1) |
> - S_0085F0_CB3_DEST_BASE_ENA(1) |
> - S_0085F0_CB4_DEST_BASE_ENA(1) |
> - S_0085F0_CB5_DEST_BASE_ENA(1) |
> - S_0085F0_CB6_DEST_BASE_ENA(1) |
> - S_0085F0_CB7_DEST_BASE_ENA(1);
> -
> - if (rctx->chip_class >= EVERGREEN) {
> - flags |= S_0085F0_CB8_DEST_BASE_ENA(1) |
> - S_0085F0_CB9_DEST_BASE_ENA(1) |
> - S_0085F0_CB10_DEST_BASE_ENA(1) |
> - S_0085F0_CB11_DEST_BASE_ENA(1);
> - }
> -
> - /* RV670 errata
> - * (CB1_DEST_BASE_ENA is also required, which is
> - * included unconditionally above). */
> - if (rctx->family == CHIP_RV670 ||
> - rctx->family == CHIP_RS780 ||
> - rctx->family == CHIP_RS880) {
> - flags |= S_0085F0_DEST_BASE_0_ENA(1);
> - }
> - }
> -
> - if (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) {
> - flags |= S_0085F0_SO0_DEST_BASE_ENA(1) |
> - S_0085F0_SO1_DEST_BASE_ENA(1) |
> - S_0085F0_SO2_DEST_BASE_ENA(1) |
> - S_0085F0_SO3_DEST_BASE_ENA(1) |
> - S_0085F0_SMX_ACTION_ENA(1);
> -
> - /* RV670 errata */
> - if (rctx->family == CHIP_RV670 ||
> - rctx->family == CHIP_RS780 ||
> - rctx->family == CHIP_RS880) {
> - flags |= S_0085F0_DEST_BASE_0_ENA(1);
> - }
> - }
> + if (rctx->flags & R600_CONTEXT_GPU_FLUSH) {
> + cp_coher_cntl |= 0x01900000;
You still have a magic number here. I had already told you about it.
We don't allow magic numbers like that in the code.
Other than that, the patch looks good.
Marek
More information about the mesa-dev
mailing list