[Mesa-dev] [PATCH 2/2] r600g: simplify and fix flushing and synchronization v2

Marek Olšák maraeo at gmail.com
Thu Jul 19 15:07:27 PDT 2012


I have these issues with the patch:

1) On GPUs without a vertex cache, you flush the texture cache every
draw operation. Are you kidding?

2) All colorbuffers / streamout buffers are flushed, even those which
are not enabled. E.g. instead of flushing only CB0 when there is only
one, this code flushes all of them. Why? This either needs an
explanation or it should only flush the buffers which are enabled
(like the old code did).

3) Please explain:
- why you added PS_PARTIAL_FLUSH in r600_texture_barrier and
r600_set_framebuffer_state.
- why you added CACHE_FLUSH_AND_INV_EVENT in set_framebuffer_state for
R700 and evergreen.
- why you applied the CB flush workarounds meant for RV6xx to all R600
and R700 chipsets.
- why the streamout workaround for RV6xx (S_0085F0_DEST_BASE_0_ENA) is
applied to all R600, R700, and evergreen chipsets.
- why R600_CONTEXT_FLUSH_AND_INV emits SURFACE_SYNC on evergreen,
resulting in emission of SURFACE_SYNC twice in a row in most
situations.

Flushing has always worked without all the changes (1, 2, 3) mentioned
above, so please if you don't have a reasonable explanation, revert to
the old behavior.

4) R600_CONTEXT_DRAW_FLUSH is a misleading name. Please rename it to
R600_CONTEXT_PS_PARTIAL_FLUSH. Also, FLUSH_AND_INV_DB_META should be
prefixed with EVENT_TYPE_.

Overall, the idea of simplifying flushing is good, but the result
looks worse than what is in master already and it generally adds more
work for the hardware. Also, the title is misleading. The patch
doesn't fix flushing in general, because there was nothing wrong with
it in the first place. Except maybe hyperz, which is almost unrelated
to what the patch tries to achieve.

Marek

On Tue, Jul 17, 2012 at 7:58 PM,  <j.glisse at gmail.com> wrote:
> From: Jerome Glisse <jglisse at redhat.com>
>
> Flushing and synchronization only need to happen at begining
> and end of cs, and after each draw packet if necessary. This
> patch is especialy needed for hyperz/htile feature.
>
> v2: Separate evergreen and r6xx/r7xx flushing/syncing allow
>     easier specialization of each functions. Fix r6xx/r7xx
>     regression.
>
> Signed-off-by: Jerome Glisse <jglisse at redhat.com>
> ---
>  src/gallium/drivers/r600/evergreen_compute.c       |   23 +--
>  .../drivers/r600/evergreen_compute_internal.c      |    4 +-
>  src/gallium/drivers/r600/evergreen_hw_context.c    |  110 ++++++++++-
>  src/gallium/drivers/r600/evergreen_state.c         |   14 +-
>  src/gallium/drivers/r600/evergreend.h              |    3 +-
>  src/gallium/drivers/r600/r600.h                    |   19 +-
>  src/gallium/drivers/r600/r600_buffer.c             |    2 +-
>  src/gallium/drivers/r600/r600_hw_context.c         |  203 ++++++++++++--------
>  src/gallium/drivers/r600/r600_hw_context_priv.h    |    3 +-
>  src/gallium/drivers/r600/r600_pipe.c               |    2 -
>  src/gallium/drivers/r600/r600_pipe.h               |    6 +-
>  src/gallium/drivers/r600/r600_state.c              |   23 +--
>  src/gallium/drivers/r600/r600_state_common.c       |   68 ++-----
>  13 files changed, 297 insertions(+), 183 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
> index 947a328..37c3395 100644
> --- a/src/gallium/drivers/r600/evergreen_compute.c
> +++ b/src/gallium/drivers/r600/evergreen_compute.c
> @@ -96,7 +96,7 @@ static void evergreen_cs_set_vertex_buffer(
>         vb->buffer = buffer;
>         vb->user_buffer = NULL;
>
> -       r600_inval_vertex_cache(rctx);
> +       rctx->flags |= R600_CONTEXT_VTX_FLUSH;
>         state->dirty_mask |= 1 << vb_index;
>         r600_atom_dirty(rctx, &state->atom);
>  }
> @@ -208,8 +208,7 @@ static void evergreen_bind_compute_state(struct pipe_context *ctx_, void *state)
>         res->usage = RADEON_USAGE_READ;
>         res->coher_bo_size = ctx->cs_shader->bc.ndw*4;
>
> -       r600_inval_shader_cache(ctx);
> -
> +       ctx->flags |= R600_CONTEXT_SH_FLUSH;
>  }
>
>  /* The kernel parameters are stored a vtx buffer (ID=0), besides the explicit
> @@ -364,8 +363,11 @@ static void compute_emit_cs(struct r600_context *ctx)
>          */
>         r600_emit_atom(ctx, &ctx->start_compute_cs_cmd.atom);
>
> +       ctx->flags |= R600_CONTEXT_CB_FLUSH;
> +       r600_flush_emit(ctx);
> +
>         /* Emit cb_state */
> -        cb_state = ctx->states[R600_PIPE_STATE_FRAMEBUFFER];
> +       cb_state = ctx->states[R600_PIPE_STATE_FRAMEBUFFER];
>         r600_context_pipe_state_emit(ctx, cb_state, RADEON_CP_PACKET3_COMPUTE_MODE);
>
>         /* Emit vertex buffer state */
> @@ -405,15 +407,8 @@ static void compute_emit_cs(struct r600_context *ctx)
>                 }
>         }
>
> -       /* r600_flush_framebuffer() updates the cb_flush_flags and then
> -        * calls r600_emit_atom() on the ctx->surface_sync_cmd.atom, which emits
> -        * a SURFACE_SYNC packet via r600_emit_surface_sync().
> -        *
> -        * XXX r600_emit_surface_sync() hardcodes the CP_COHER_SIZE to
> -        * 0xffffffff, so we will need to add a field to struct
> -        * r600_surface_sync_cmd if we want to manually set this value.
> -        */
> -       r600_flush_framebuffer(ctx, true /* Flush now */);
> +       ctx->flags |= R600_CONTEXT_CB_FLUSH;
> +       r600_flush_emit(ctx);
>
>  #if 0
>         COMPUTE_DBG("cdw: %i\n", cs->cdw);
> @@ -460,6 +455,8 @@ static void evergreen_launch_grid(
>         evergreen_set_lds(ctx->cs_shader, 0, 0, num_waves);
>         evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input);
>         evergreen_direct_dispatch(ctx_, block_layout, grid_layout);
> +       /* set draw pending so flush function know we mean business */
> +       ctx->flags |= R600_CONTEXT_DRAW_PENDING;
>         compute_emit_cs(ctx);
>  }
>
> diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c
> index 1d11bab..8bb6426 100644
> --- a/src/gallium/drivers/r600/evergreen_compute_internal.c
> +++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
> @@ -559,7 +559,7 @@ void evergreen_set_tex_resource(
>
>         res->coher_bo_size = tmp->offset[0] + util_format_get_blockwidth(tmp->real_format)*view->base.texture->width0*height*depth;
>
> -       r600_inval_texture_cache(pipe->ctx);
> +       pipe->ctx->flags |= R600_CONTEXT_TEX_FLUSH;
>
>         evergreen_emit_force_reloc(res);
>         evergreen_emit_force_reloc(res);
> @@ -618,7 +618,7 @@ void evergreen_set_const_cache(
>         res->usage = RADEON_USAGE_READ;
>         res->coher_bo_size = size;
>
> -       r600_inval_shader_cache(pipe->ctx);
> +       pipe->ctx->flags |= R600_CONTEXT_SH_FLUSH;
>  }
>
>  struct r600_resource* r600_compute_buffer_alloc_vram(
> diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
> index 546c884..cf4a225 100644
> --- a/src/gallium/drivers/r600/evergreen_hw_context.c
> +++ b/src/gallium/drivers/r600/evergreen_hw_context.c
> @@ -761,8 +761,9 @@ static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_c
>         /* We have to flush the shaders before we change the border color
>          * registers, or previous draw commands that haven't completed yet
>          * will end up using the new border color. */
> -       if (dirty & R600_BLOCK_STATUS_DIRTY)
> -               r600_context_ps_partial_flush(ctx);
> +       if (dirty & R600_BLOCK_STATUS_DIRTY) {
> +               ctx->flags |= R600_CONTEXT_DRAW_FLUSH;
> +       }
>         if (dirty)
>                 r600_context_dirty_block(ctx, block, dirty, 4);
>  }
> @@ -823,3 +824,108 @@ void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned buffer_en
>                 cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(0);
>         }
>  }
> +
> +void evergreen_flush_emit(struct r600_context *rctx)
> +{
> +       struct radeon_winsys_cs *cs = rctx->cs;
> +       unsigned mask;
> +
> +       if (!(rctx->flags & R600_CONTEXT_DRAW_PENDING)) {
> +               return;
> +       }
> +       /* for GPU without vertex cache flush the texture cache */
> +       if (!rctx->has_vertex_cache) {
> +               rctx->flags |= R600_CONTEXT_TEX_FLUSH;
> +       }
> +
> +       if (rctx->flags & R600_CONTEXT_DRAW_FLUSH) {
> +               rctx->flags &= ~R600_CONTEXT_DRAW_FLUSH;
> +               cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
> +               cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
> +       }
> +
> +       if (rctx->flags & R600_CONTEXT_DB_FLUSH) {
> +               cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
> +               cs->buf[cs->cdw++] = EVENT_TYPE(FLUSH_AND_INV_DB_META) | EVENT_INDEX(0);
> +       }
> +
> +       mask = R600_CONTEXT_CB_FLUSH |
> +              R600_CONTEXT_DB_FLUSH |
> +              R600_CONTEXT_SH_FLUSH |
> +              R600_CONTEXT_TEX_FLUSH |
> +              R600_CONTEXT_VTX_FLUSH |
> +              R600_CONTEXT_STREAMOUT_FLUSH;
> +       if (rctx->flags & mask) {
> +               /* anything left (cb, vtx, shader, streamout) can be flushed
> +                * in a more generic way
> +                */
> +               unsigned flags = 0;
> +
> +               flags |= (rctx->flags & R600_CONTEXT_CB_FLUSH) ? S_0085F0_CB_ACTION_ENA(1) |
> +                                                                S_0085F0_CB0_DEST_BASE_ENA(1) |
> +                                                                S_0085F0_CB1_DEST_BASE_ENA(1) |
> +                                                                S_0085F0_CB2_DEST_BASE_ENA(1) |
> +                                                                S_0085F0_CB3_DEST_BASE_ENA(1) |
> +                                                                S_0085F0_CB4_DEST_BASE_ENA(1) |
> +                                                                S_0085F0_CB5_DEST_BASE_ENA(1) |
> +                                                                S_0085F0_CB6_DEST_BASE_ENA(1) |
> +                                                                S_0085F0_CB7_DEST_BASE_ENA(1) |
> +                                                                S_0085F0_CB8_DEST_BASE_ENA(1) |
> +                                                                S_0085F0_CB9_DEST_BASE_ENA(1) |
> +                                                                S_0085F0_CB10_DEST_BASE_ENA(1) |
> +                                                                S_0085F0_CB11_DEST_BASE_ENA(1) : 0;
> +               flags |= (rctx->flags & R600_CONTEXT_DB_FLUSH) ? S_0085F0_DB_ACTION_ENA(1) |
> +                                                                S_0085F0_DB_DEST_BASE_ENA(1) : 0;
> +               flags |= (rctx->flags & R600_CONTEXT_SH_FLUSH) ? S_0085F0_SH_ACTION_ENA(1) : 0;
> +               flags |= (rctx->flags & R600_CONTEXT_TEX_FLUSH) ? S_0085F0_TC_ACTION_ENA(1) : 0;
> +               flags |= (rctx->flags & R600_CONTEXT_VTX_FLUSH) ? S_0085F0_VC_ACTION_ENA(1) : 0;
> +               flags |= (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) ? S_0085F0_DEST_BASE_0_ENA(1) |
> +                                                                       S_0085F0_SMX_ACTION_ENA(1) |
> +                                                                       S_0085F0_SO0_DEST_BASE_ENA(1) |
> +                                                                       S_0085F0_SO1_DEST_BASE_ENA(1) |
> +                                                                       S_0085F0_SO2_DEST_BASE_ENA(1) |
> +                                                                       S_0085F0_SO3_DEST_BASE_ENA(1) : 0;
> +               rctx->flags &= ~mask;
> +               cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
> +               cs->buf[cs->cdw++] = flags;           /* CP_COHER_CNTL */
> +               cs->buf[cs->cdw++] = 0xffffffff;      /* CP_COHER_SIZE */
> +               cs->buf[cs->cdw++] = 0;               /* CP_COHER_BASE */
> +               cs->buf[cs->cdw++] = 0x0000000A;      /* POLL_INTERVAL */
> +       }
> +
> +       if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) {
> +               rctx->flags &= ~R600_CONTEXT_FLUSH_AND_INV;
> +
> +               cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
> +               cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
> +               cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
> +               /* CP_COHER_CNTL */
> +               cs->buf[cs->cdw++] = S_0085F0_SO0_DEST_BASE_ENA(1) |
> +                                    S_0085F0_SO1_DEST_BASE_ENA(1) |
> +                                    S_0085F0_SO2_DEST_BASE_ENA(1) |
> +                                    S_0085F0_SO3_DEST_BASE_ENA(1) |
> +                                    S_0085F0_CB0_DEST_BASE_ENA(1) |
> +                                    S_0085F0_CB1_DEST_BASE_ENA(1) |
> +                                    S_0085F0_CB2_DEST_BASE_ENA(1) |
> +                                    S_0085F0_CB3_DEST_BASE_ENA(1) |
> +                                    S_0085F0_CB4_DEST_BASE_ENA(1) |
> +                                    S_0085F0_CB5_DEST_BASE_ENA(1) |
> +                                    S_0085F0_CB6_DEST_BASE_ENA(1) |
> +                                    S_0085F0_CB7_DEST_BASE_ENA(1) |
> +                                    S_0085F0_DB_DEST_BASE_ENA(1) |
> +                                    S_0085F0_CB8_DEST_BASE_ENA(1) |
> +                                    S_0085F0_CB9_DEST_BASE_ENA(1) |
> +                                    S_0085F0_CB10_DEST_BASE_ENA(1) |
> +                                    S_0085F0_CB11_DEST_BASE_ENA(1) |
> +                                    S_0085F0_TC_ACTION_ENA(1) |
> +                                    S_0085F0_VC_ACTION_ENA(1) |
> +                                    S_0085F0_SH_ACTION_ENA(1) |
> +                                    S_0085F0_SMX_ACTION_ENA(1);
> +               cs->buf[cs->cdw++] = 0xffffffff;      /* CP_COHER_SIZE */
> +               cs->buf[cs->cdw++] = 0;               /* CP_COHER_BASE */
> +               cs->buf[cs->cdw++] = 0x0000000A;      /* POLL_INTERVAL */
> +       }
> +
> +       /* everything is properly flush */
> +       rctx->flags &= R600_CONTEXT_DRAW_PENDING;
> +}
> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
> index 214d76b..8e3eb95 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -1160,8 +1160,9 @@ static void evergreen_bind_ps_sampler(struct pipe_context *ctx, unsigned count,
>         struct r600_context *rctx = (struct r600_context *)ctx;
>         struct r600_pipe_state **rstates = (struct r600_pipe_state **)states;
>
> -       if (count)
> -               r600_inval_texture_cache(rctx);
> +       if (count) {
> +               rctx->flags |= R600_CONTEXT_TEX_FLUSH;
> +       }
>
>         memcpy(rctx->ps_samplers.samplers, states, sizeof(void*) * count);
>         rctx->ps_samplers.n_samplers = count;
> @@ -1176,8 +1177,9 @@ static void evergreen_bind_vs_sampler(struct pipe_context *ctx, unsigned count,
>         struct r600_context *rctx = (struct r600_context *)ctx;
>         struct r600_pipe_state **rstates = (struct r600_pipe_state **)states;
>
> -       if (count)
> -               r600_inval_texture_cache(rctx);
> +       if (count) {
> +               rctx->flags |= R600_CONTEXT_TEX_FLUSH;
> +       }
>
>         for (int i = 0; i < count; i++) {
>                 evergreen_context_pipe_state_set_vs_sampler(rctx, rstates[i], i);
> @@ -1674,6 +1676,7 @@ static void evergreen_db(struct r600_context *rctx, struct r600_pipe_state *rsta
>         if (rtex->hyperz) {
>                 uint64_t htile_offset = rtex->hyperz->surface.level[level].offset;
>
> +               rctx->flags |= R600_CONTEXT_FLUSH_AND_INV;
>                 rctx->db_misc_state.hyperz = true;
>                 rctx->db_misc_state.db_htile_surface_mask = 0xffffffff;
>                 r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
> @@ -1709,7 +1712,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
>         if (rstate == NULL)
>                 return;
>
> -       r600_flush_framebuffer(rctx, false);
> +       rctx->flags |= R600_CONTEXT_CB_FLUSH | R600_CONTEXT_FLUSH_AND_INV;
>
>         /* unreference old buffer and reference new one */
>         rstate->id = R600_PIPE_STATE_FRAMEBUFFER;
> @@ -1729,6 +1732,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
>         }
>
>         if (state->zsbuf) {
> +               rctx->flags |= R600_CONTEXT_DB_FLUSH;
>                 evergreen_db(rctx, rstate, state);
>         }
>
> diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
> index 1ac5944..672e698 100644
> --- a/src/gallium/drivers/r600/evergreend.h
> +++ b/src/gallium/drivers/r600/evergreend.h
> @@ -46,7 +46,8 @@
>  #define EVENT_TYPE_PS_PARTIAL_FLUSH            0x10
>  #define EVENT_TYPE_ZPASS_DONE                  0x15
>  #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT   0x16
> -#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH       0x1f
> +#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH       0x1f
> +#define FLUSH_AND_INV_DB_META                  0x2c
>
>  #define                EVENT_TYPE(x)                           ((x) << 0)
>  #define                EVENT_INDEX(x)                          ((x) << 8)
> diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
> index 0ae7959..820b356 100644
> --- a/src/gallium/drivers/r600/r600.h
> +++ b/src/gallium/drivers/r600/r600.h
> @@ -188,8 +188,17 @@ struct r600_so_target {
>         unsigned                so_index;
>  };
>
> -#define R600_CONTEXT_DRAW_PENDING      (1 << 0)
> -#define R600_CONTEXT_DST_CACHES_DIRTY  (1 << 1)
> +#define R600_CONTEXT_DRAW_PENDING              (1 << 0)
> +#define R600_CONTEXT_DRAW_FLUSH                        (1 << 1)
> +#define R600_CONTEXT_CB_FLUSH                  (1 << 2)
> +#define R600_CONTEXT_DB_FLUSH                  (1 << 3)
> +#define R600_CONTEXT_SH_FLUSH                  (1 << 4)
> +#define R600_CONTEXT_TEX_FLUSH                 (1 << 5)
> +#define R600_CONTEXT_VTX_FLUSH                 (1 << 6)
> +#define R600_CONTEXT_STREAMOUT_FLUSH           (1 << 7)
> +#define R600_CONTEXT_WAIT_IDLE                 (1 << 8)
> +#define R600_CONTEXT_FLUSH_AND_INV             (1 << 9)
> +#define R600_CONTEXT_HTILE_ERRATA              (1 << 10)
>
>  struct r600_context;
>  struct r600_screen;
> @@ -207,10 +216,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags);
>
>  void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence,
>                               unsigned offset, unsigned value);
> -void r600_inval_shader_cache(struct r600_context *ctx);
> -void r600_inval_texture_cache(struct r600_context *ctx);
> -void r600_inval_vertex_cache(struct r600_context *ctx);
> -void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now);
> +void r600_flush_emit(struct r600_context *ctx);
>
>  void r600_context_streamout_begin(struct r600_context *ctx);
>  void r600_context_streamout_end(struct r600_context *ctx);
> @@ -222,6 +228,7 @@ void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r60
>  int evergreen_context_init(struct r600_context *ctx);
>  void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
>  void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
> +void evergreen_flush_emit(struct r600_context *rctx);
>
>  void _r600_pipe_state_add_reg_bo(struct r600_context *ctx,
>                                  struct r600_pipe_state *state,
> diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
> index 8e2deb1..774b876 100644
> --- a/src/gallium/drivers/r600/r600_buffer.c
> +++ b/src/gallium/drivers/r600/r600_buffer.c
> @@ -110,7 +110,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe,
>                                         struct r600_vertexbuf_state * state =
>                                                 &rctx->vertex_buffer_state;
>                                         state->dirty_mask |= 1 << i;
> -                                       r600_inval_vertex_cache(rctx);
> +                                       rctx->flags |= R600_CONTEXT_VTX_FLUSH;
>                                         r600_atom_dirty(rctx, &state->atom);
>                                 }
>                         }
> diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
> index 4f2c03a..95c861a 100644
> --- a/src/gallium/drivers/r600/r600_hw_context.c
> +++ b/src/gallium/drivers/r600/r600_hw_context.c
> @@ -114,19 +114,6 @@ err:
>         return;
>  }
>
> -void r600_context_ps_partial_flush(struct r600_context *ctx)
> -{
> -       struct radeon_winsys_cs *cs = ctx->cs;
> -
> -       if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING))
> -               return;
> -
> -       cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
> -       cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
> -
> -       ctx->flags &= ~R600_CONTEXT_DRAW_PENDING;
> -}
> -
>  static void r600_init_block(struct r600_context *ctx,
>                             struct r600_block *block,
>                             const struct r600_reg *reg, int index, int nreg,
> @@ -825,7 +812,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
>         }
>
>         /* Count in framebuffer cache flushes at the end of CS. */
> -       num_dw += 7; /* one SURFACE_SYNC and CACHE_FLUSH_AND_INV (r6xx-only) */
> +       num_dw += 44; /* one SURFACE_SYNC and CACHE_FLUSH_AND_INV (r6xx-only) */
>
>         /* Save 16 dwords for the fence mechanism. */
>         num_dw += 16;
> @@ -853,7 +840,7 @@ void r600_context_dirty_block(struct r600_context *ctx,
>                 LIST_ADDTAIL(&block->list,&ctx->dirty);
>
>                 if (block->flags & REG_FLAG_FLUSH_CHANGE) {
> -                       r600_context_ps_partial_flush(ctx);
> +                       ctx->flags |= R600_CONTEXT_DRAW_FLUSH;
>                 }
>         }
>  }
> @@ -1085,8 +1072,9 @@ static inline void r600_context_pipe_state_set_sampler_border(struct r600_contex
>         /* We have to flush the shaders before we change the border color
>          * registers, or previous draw commands that haven't completed yet
>          * will end up using the new border color. */
> -       if (dirty & R600_BLOCK_STATUS_DIRTY)
> -               r600_context_ps_partial_flush(ctx);
> +       if (dirty & R600_BLOCK_STATUS_DIRTY) {
> +               ctx->flags |= R600_CONTEXT_DRAW_FLUSH;
> +       }
>         if (dirty)
>                 r600_context_dirty_block(ctx, block, dirty, 3);
>  }
> @@ -1200,54 +1188,116 @@ void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r60
>         LIST_DELINIT(&block->list);
>  }
>
> -void r600_inval_shader_cache(struct r600_context *ctx)
> +void r600_flush_emit(struct r600_context *rctx)
>  {
> -       ctx->surface_sync_cmd.flush_flags |= S_0085F0_SH_ACTION_ENA(1);
> -       r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom);
> -}
> -
> -void r600_inval_texture_cache(struct r600_context *ctx)
> -{
> -       ctx->surface_sync_cmd.flush_flags |= S_0085F0_TC_ACTION_ENA(1);
> -       r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom);
> -}
> -
> -void r600_inval_vertex_cache(struct r600_context *ctx)
> -{
> -       if (ctx->has_vertex_cache) {
> -               ctx->surface_sync_cmd.flush_flags |= S_0085F0_VC_ACTION_ENA(1);
> -       } else {
> -               /* Some GPUs don't have the vertex cache and must use the texture cache instead. */
> -               ctx->surface_sync_cmd.flush_flags |= S_0085F0_TC_ACTION_ENA(1);
> -       }
> -       r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom);
> -}
> +       struct radeon_winsys_cs *cs = rctx->cs;
> +       unsigned mask;
>
> -void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now)
> -{
> -       if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY))
> +       if (!(rctx->flags & R600_CONTEXT_DRAW_PENDING)) {
>                 return;
> +       }
> +       /* for GPU without vertex cache flush the texture cache */
> +       if (!rctx->has_vertex_cache) {
> +               rctx->flags |= R600_CONTEXT_TEX_FLUSH;
> +       }
>
> -       ctx->surface_sync_cmd.flush_flags |=
> -               r600_get_cb_flush_flags(ctx) |
> -               (ctx->framebuffer.zsbuf ? S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1) : 0);
> +       if (rctx->flags & R600_CONTEXT_DRAW_FLUSH) {
> +               rctx->flags &= ~R600_CONTEXT_DRAW_FLUSH;
> +               cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
> +               cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
> +       }
>
> -       if (flush_now) {
> -               r600_emit_atom(ctx, &ctx->surface_sync_cmd.atom);
> -       } else {
> -               r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom);
> +       if (rctx->flags & R600_CONTEXT_WAIT_IDLE) {
> +               /* wait for things to settle */
> +               rctx->flags &= ~R600_CONTEXT_WAIT_IDLE;
> +               cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0);
> +               cs->buf[cs->cdw++] = (R_008040_WAIT_UNTIL - R600_CONFIG_REG_OFFSET) >> 2;
> +               cs->buf[cs->cdw++] = S_008040_WAIT_3D_IDLE(1);
>         }
>
> -       /* Also add a complete cache flush to work around broken flushing on R6xx. */
> -       if (ctx->chip_class == R600) {
> -               if (flush_now) {
> -                       r600_emit_atom(ctx, &ctx->r6xx_flush_and_inv_cmd);
> -               } else {
> -                       r600_atom_dirty(ctx, &ctx->r6xx_flush_and_inv_cmd);
> +       if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) {
> +               rctx->flags &= ~R600_CONTEXT_FLUSH_AND_INV;
> +
> +               cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
> +               cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
> +
> +               /* db flush are special due to errata with hyperz, we need to
> +                * insert noop so cache have time to really flush
> +                */
> +               if (rctx->flags & R600_CONTEXT_HTILE_ERRATA) {
> +                       /* R600_CONTEXT_HTILE_ERRATA is persistant for whole cs */
> +                       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 31, 0);
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
>                 }
>         }
>
> -       ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY;
> +       mask = R600_CONTEXT_CB_FLUSH |
> +              R600_CONTEXT_DB_FLUSH |
> +              R600_CONTEXT_SH_FLUSH |
> +              R600_CONTEXT_TEX_FLUSH |
> +              R600_CONTEXT_VTX_FLUSH |
> +              R600_CONTEXT_STREAMOUT_FLUSH;
> +       if (rctx->flags & mask) {
> +               /* anything left (cb, vtx, shader, streamout) can be flushed
> +                * in a more generic way
> +                */
> +               unsigned flags = 0;
> +
> +               flags |= S_0085F0_CB1_DEST_BASE_ENA(1) | S_0085F0_DEST_BASE_0_ENA(1);
> +               flags |= (rctx->flags & R600_CONTEXT_CB_FLUSH) ? S_0085F0_CB_ACTION_ENA(1) : 0;
> +               flags |= (rctx->flags & R600_CONTEXT_DB_FLUSH) ? S_0085F0_DB_ACTION_ENA(1) : 0;
> +               flags |= (rctx->flags & R600_CONTEXT_SH_FLUSH) ? S_0085F0_SH_ACTION_ENA(1) : 0;
> +               flags |= (rctx->flags & R600_CONTEXT_TEX_FLUSH) ? S_0085F0_TC_ACTION_ENA(1) : 0;
> +               flags |= (rctx->flags & R600_CONTEXT_VTX_FLUSH) ? S_0085F0_VC_ACTION_ENA(1) : 0;
> +               flags |= (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) ? S_0085F0_DEST_BASE_0_ENA(1) |
> +                                                                       S_0085F0_SO0_DEST_BASE_ENA(1) |
> +                                                                       S_0085F0_SO1_DEST_BASE_ENA(1) |
> +                                                                       S_0085F0_SO2_DEST_BASE_ENA(1) |
> +                                                                       S_0085F0_SO3_DEST_BASE_ENA(1) |
> +                                                                       S_0085F0_SMX_ACTION_ENA(1) : 0;
> +               rctx->flags &= ~mask;
> +               cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
> +               cs->buf[cs->cdw++] = flags;           /* CP_COHER_CNTL */
> +               cs->buf[cs->cdw++] = 0xffffffff;      /* CP_COHER_SIZE */
> +               cs->buf[cs->cdw++] = 0;               /* CP_COHER_BASE */
> +               cs->buf[cs->cdw++] = 0x0000000A;      /* POLL_INTERVAL */
> +       }
> +
> +       /* everything is properly flush */
> +       rctx->flags &= R600_CONTEXT_DRAW_PENDING |
> +       /* R600_CONTEXT_HTILE_ERRATA is persistant for whole cs */
> +                      R600_CONTEXT_HTILE_ERRATA;
>  }
>
>  void r600_context_flush(struct r600_context *ctx, unsigned flags)
> @@ -1276,10 +1326,13 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
>                 streamout_suspended = true;
>         }
>
> -       r600_flush_framebuffer(ctx, true);
> -
> -       /* partial flush is needed to avoid lockups on some chips with user fences */
> -       r600_context_ps_partial_flush(ctx);
> +       ctx->flags |= R600_CONTEXT_DRAW_FLUSH;
> +       ctx->flags |= R600_CONTEXT_FLUSH_AND_INV;
> +       if (ctx->chip_class >= EVERGREEN) {
> +               evergreen_flush_emit(ctx);
> +       } else {
> +               r600_flush_emit(ctx);
> +       }
>
>         /* old kernels and userspace don't set SX_MISC, so we must reset it to 0 here */
>         if (ctx->chip_class <= R700) {
> @@ -1298,11 +1351,9 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
>         /* Begin a new CS. */
>         r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
>
> -       /* Invalidate caches. */
> -       r600_inval_vertex_cache(ctx);
> -       r600_inval_texture_cache(ctx);
> -       r600_inval_shader_cache(ctx);
> -       r600_flush_framebuffer(ctx, false);
> +       ctx->flags |= R600_CONTEXT_SH_FLUSH |
> +                     R600_CONTEXT_TEX_FLUSH |
> +                     R600_CONTEXT_VTX_FLUSH;
>
>         /* Re-emit states. */
>         r600_atom_dirty(ctx, &ctx->cb_misc_state.atom);
> @@ -1357,7 +1408,10 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fen
>         va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo);
>         va = va + (offset << 2);
>
> -       r600_context_ps_partial_flush(ctx);
> +       ctx->flags &= ~R600_CONTEXT_DRAW_FLUSH;
> +       cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
> +       cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
> +
>         cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
>         cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
>         cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL;       /* ADDRESS_LO */
> @@ -1518,7 +1572,7 @@ void r600_context_streamout_end(struct r600_context *ctx)
>  {
>         struct radeon_winsys_cs *cs = ctx->cs;
>         struct r600_so_target **t = ctx->so_targets;
> -       unsigned i, flush_flags = 0;
> +       unsigned i;
>         uint64_t va;
>
>         if (ctx->chip_class >= EVERGREEN) {
> @@ -1545,7 +1599,6 @@ void r600_context_streamout_end(struct r600_context *ctx)
>                                 r600_context_bo_reloc(ctx,  t[i]->filled_size,
>                                                       RADEON_USAGE_WRITE);
>
> -                       flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i;
>                 }
>         }
>
> @@ -1554,23 +1607,11 @@ void r600_context_streamout_end(struct r600_context *ctx)
>         } else {
>                 r600_set_streamout_enable(ctx, 0);
>         }
> -
> +       ctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH;
>         /* This is needed to fix cache flushes on r600. */
>         if (ctx->chip_class == R600) {
> -               if (ctx->family == CHIP_RV670 ||
> -                   ctx->family == CHIP_RS780 ||
> -                   ctx->family == CHIP_RS880) {
> -                       flush_flags |= S_0085F0_DEST_BASE_0_ENA(1);
> -               }
> -
> -               r600_atom_dirty(ctx, &ctx->r6xx_flush_and_inv_cmd);
> +               ctx->flags |= R600_CONTEXT_FLUSH_AND_INV;
>         }
> -
> -       /* Flush streamout caches. */
> -       ctx->surface_sync_cmd.flush_flags |=
> -               S_0085F0_SMX_ACTION_ENA(1) | flush_flags;
> -       r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom);
> -
>         ctx->num_cs_dw_streamout_end = 0;
>
>  #if 0
> diff --git a/src/gallium/drivers/r600/r600_hw_context_priv.h b/src/gallium/drivers/r600/r600_hw_context_priv.h
> index 037d5e3..6929336 100644
> --- a/src/gallium/drivers/r600/r600_hw_context_priv.h
> +++ b/src/gallium/drivers/r600/r600_hw_context_priv.h
> @@ -28,7 +28,8 @@
>
>  #include "r600_pipe.h"
>
> -#define R600_MAX_DRAW_CS_DWORDS 11
> +/* we alsoe here account size needed for flushing */
> +#define R600_MAX_DRAW_CS_DWORDS 64
>
>  /* these flags are used in register flags and added into block flags */
>  #define REG_FLAG_NEED_BO 1
> diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
> index 9f20560..07a398f 100644
> --- a/src/gallium/drivers/r600/r600_pipe.c
> +++ b/src/gallium/drivers/r600/r600_pipe.c
> @@ -231,8 +231,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
>         rctx->context.create_video_decoder = vl_create_decoder;
>         rctx->context.create_video_buffer = vl_video_buffer_create;
>
> -       r600_init_common_atoms(rctx);
> -
>         switch (rctx->chip_class) {
>         case R600:
>         case R700:
> diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
> index 5ff4d51..4add90c 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -341,9 +341,7 @@ struct r600_context {
>         struct r600_command_buffer      start_cs_cmd; /* invariant state mostly */
>         /** Compute specific registers initializations.  The start_cs_cmd atom
>          *  must be emitted before start_compute_cs_cmd. */
> -        struct r600_command_buffer      start_compute_cs_cmd;
> -       struct r600_surface_sync_cmd    surface_sync_cmd;
> -       struct r600_atom                r6xx_flush_and_inv_cmd;
> +        struct r600_command_buffer     start_compute_cs_cmd;
>         struct r600_cb_misc_state       cb_misc_state;
>         struct r600_db_misc_state       db_misc_state;
>         /** Vertex buffers for fetch shaders */
> @@ -528,8 +526,6 @@ void r600_translate_index_buffer(struct r600_context *r600,
>  void r600_init_atom(struct r600_atom *atom,
>                     void (*emit)(struct r600_context *ctx, struct r600_atom *state),
>                     unsigned num_dw, enum r600_atom_flags flags);
> -void r600_init_common_atoms(struct r600_context *rctx);
> -unsigned r600_get_cb_flush_flags(struct r600_context *rctx);
>  void r600_texture_barrier(struct pipe_context *ctx);
>  void r600_set_index_buffer(struct pipe_context *ctx,
>                            const struct pipe_index_buffer *ib);
> diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
> index 8925a23..3f43a9d 100644
> --- a/src/gallium/drivers/r600/r600_state.c
> +++ b/src/gallium/drivers/r600/r600_state.c
> @@ -1133,8 +1133,9 @@ static void r600_set_sampler_views(struct r600_context *rctx,
>         struct r600_pipe_sampler_view **rviews = (struct r600_pipe_sampler_view **)views;
>         unsigned i;
>
> -       if (count)
> -               r600_inval_texture_cache(rctx);
> +       if (count) {
> +               rctx->flags |= R600_CONTEXT_TEX_FLUSH;
> +       }
>
>         for (i = 0; i < count; i++) {
>                 if (rviews[i]) {
> @@ -1632,6 +1633,8 @@ static void r600_db(struct r600_context *rctx, struct r600_pipe_state *rstate,
>         if (rtex->hyperz) {
>                 uint64_t htile_offset = rtex->hyperz->surface.level[level].offset;
>
> +               rctx->flags |= R600_CONTEXT_FLUSH_AND_INV;
> +               rctx->flags |= R600_CONTEXT_HTILE_ERRATA;
>                 rctx->db_misc_state.hyperz = true;
>                 rctx->db_misc_state.db_htile_surface_mask = 0xffffffff;
>                 r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
> @@ -1676,7 +1679,10 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
>         if (rstate == NULL)
>                 return;
>
> -       r600_flush_framebuffer(rctx, false);
> +       /* the htile errata is also needed for cb should probably rename */
> +       rctx->flags |= R600_CONTEXT_CB_FLUSH |
> +                      R600_CONTEXT_DRAW_FLUSH |
> +                      R600_CONTEXT_FLUSH_AND_INV;
>
>         /* unreference old buffer and reference new one */
>         rstate->id = R600_PIPE_STATE_FRAMEBUFFER;
> @@ -1692,6 +1698,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
>                 r600_cb(rctx, rstate, state, i);
>         }
>         if (state->zsbuf) {
> +               rctx->flags |= R600_CONTEXT_DB_FLUSH;
>                 r600_db(rctx, rstate, state);
>         }
>
> @@ -1962,14 +1969,8 @@ void r600_adjust_gprs(struct r600_context *rctx)
>         unsigned tmp;
>         int diff;
>
> -       /* XXX: Following call moved from r600_bind_[ps|vs]_shader,
> -        * it seems eg+ doesn't need it, r6xx/7xx probably need it only for
> -        * adjusting the GPR allocation?
> -        * Do we need this if we aren't really changing config below? */
> -       r600_inval_shader_cache(rctx);
> -
> -       if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs)
> -       {
> +       rctx->flags |= R600_CONTEXT_SH_FLUSH;
> +       if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) {
>                 diff = rctx->ps_shader->current->shader.bc.ngpr - rctx->default_ps_gprs;
>                 num_vs_gprs -= diff;
>                 num_ps_gprs += diff;
> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
> index 3c42a44..c9fd362 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -56,27 +56,6 @@ void r600_release_command_buffer(struct r600_command_buffer *cb)
>         FREE(cb->buf);
>  }
>
> -static void r600_emit_surface_sync(struct r600_context *rctx, struct r600_atom *atom)
> -{
> -       struct radeon_winsys_cs *cs = rctx->cs;
> -       struct r600_surface_sync_cmd *a = (struct r600_surface_sync_cmd*)atom;
> -
> -       cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
> -       cs->buf[cs->cdw++] = a->flush_flags;  /* CP_COHER_CNTL */
> -       cs->buf[cs->cdw++] = 0xffffffff;      /* CP_COHER_SIZE */
> -       cs->buf[cs->cdw++] = 0;               /* CP_COHER_BASE */
> -       cs->buf[cs->cdw++] = 0x0000000A;      /* POLL_INTERVAL */
> -
> -       a->flush_flags = 0;
> -}
> -
> -static void r600_emit_r6xx_flush_and_inv(struct r600_context *rctx, struct r600_atom *atom)
> -{
> -       struct radeon_winsys_cs *cs = rctx->cs;
> -       cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
> -       cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
> -}
> -
>  void r600_init_atom(struct r600_atom *atom,
>                     void (*emit)(struct r600_context *ctx, struct r600_atom *state),
>                     unsigned num_dw, enum r600_atom_flags flags)
> @@ -86,37 +65,11 @@ void r600_init_atom(struct r600_atom *atom,
>         atom->flags = flags;
>  }
>
> -void r600_init_common_atoms(struct r600_context *rctx)
> -{
> -       r600_init_atom(&rctx->surface_sync_cmd.atom,    r600_emit_surface_sync,         5, EMIT_EARLY);
> -       r600_init_atom(&rctx->r6xx_flush_and_inv_cmd,   r600_emit_r6xx_flush_and_inv,   2, EMIT_EARLY);
> -}
> -
> -unsigned r600_get_cb_flush_flags(struct r600_context *rctx)
> -{
> -       unsigned flags = 0;
> -
> -       if (rctx->framebuffer.nr_cbufs) {
> -               flags |= S_0085F0_CB_ACTION_ENA(1) |
> -                        (((1 << rctx->framebuffer.nr_cbufs) - 1) << S_0085F0_CB0_DEST_BASE_ENA_SHIFT);
> -       }
> -
> -       /* Workaround for broken flushing on some R6xx chipsets. */
> -       if (rctx->family == CHIP_RV670 ||
> -           rctx->family == CHIP_RS780 ||
> -           rctx->family == CHIP_RS880) {
> -               flags |=  S_0085F0_CB1_DEST_BASE_ENA(1) |
> -                         S_0085F0_DEST_BASE_0_ENA(1);
> -       }
> -       return flags;
> -}
> -
>  void r600_texture_barrier(struct pipe_context *ctx)
>  {
>         struct r600_context *rctx = (struct r600_context *)ctx;
>
> -       rctx->surface_sync_cmd.flush_flags |= S_0085F0_TC_ACTION_ENA(1) | r600_get_cb_flush_flags(rctx);
> -       r600_atom_dirty(rctx, &rctx->surface_sync_cmd.atom);
> +       rctx->flags |= R600_CONTEXT_DRAW_FLUSH | R600_CONTEXT_CB_FLUSH | R600_CONTEXT_TEX_FLUSH;
>  }
>
>  static bool r600_conv_pipe_prim(unsigned pprim, unsigned *prim)
> @@ -368,7 +321,7 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
>
>         rctx->vertex_elements = v;
>         if (v) {
> -               r600_inval_shader_cache(rctx);
> +               rctx->flags |= R600_CONTEXT_SH_FLUSH;
>
>                 rctx->states[v->rstate.id] = &v->rstate;
>                 r600_context_pipe_state_set(rctx, &v->rstate);
> @@ -412,9 +365,9 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
>
>         util_copy_vertex_buffers(rctx->vertex_buffer, &rctx->nr_vertex_buffers, buffers, count);
>
> -       r600_inval_vertex_cache(rctx);
> +       rctx->flags |= R600_CONTEXT_VTX_FLUSH;
>         state->atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 11) *
> -                                          rctx->nr_vertex_buffers;
> +                            rctx->nr_vertex_buffers;
>         for (i = 0 ; i < rctx->nr_vertex_buffers; i++) {
>                 state->dirty_mask |= 1 << i;
>         }
> @@ -523,6 +476,9 @@ static int r600_shader_select(struct pipe_context *ctx,
>         if (dirty)
>                 *dirty = 1;
>
> +       /* we are changing shader need a flush */
> +       rctx->flags |= R600_CONTEXT_SH_FLUSH;
> +
>         shader->next_variant = sel->current;
>         sel->current = shader;
>
> @@ -667,7 +623,7 @@ static void r600_update_alpha_ref(struct r600_context *rctx)
>
>  void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state)
>  {
> -       r600_inval_shader_cache(rctx);
> +       rctx->flags |= R600_CONTEXT_SH_FLUSH;
>         state->atom.num_dw = rctx->chip_class >= EVERGREEN ? util_bitcount(state->dirty_mask)*20
>                                                            : util_bitcount(state->dirty_mask)*19;
>         r600_atom_dirty(rctx, &state->atom);
> @@ -893,6 +849,12 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
>
>         r600_update_derived_state(rctx);
>
> +       if (rctx->chip_class >= EVERGREEN) {
> +               evergreen_flush_emit(rctx);
> +       } else {
> +               r600_flush_emit(rctx);
> +       }
> +
>         if (info.indexed) {
>                 /* Initialize the index buffer struct. */
>                 pipe_resource_reference(&ib.buffer, rctx->index_buffer.buffer);
> @@ -999,7 +961,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
>                                         (info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0);
>         }
>
> -       rctx->flags |= R600_CONTEXT_DST_CACHES_DIRTY | R600_CONTEXT_DRAW_PENDING;
> +       rctx->flags |= R600_CONTEXT_DRAW_PENDING;
>
>         /* Set the depth buffer as dirty. */
>         if (rctx->framebuffer.zsbuf) {
> --
> 1.7.10.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list