[Mesa-dev] [PATCH] r600g: simplify flushing

Jerome Glisse j.glisse at gmail.com
Mon Sep 10 12:47:53 PDT 2012


On Sun, Sep 9, 2012 at 1:03 AM, Marek Olšák <maraeo at gmail.com> wrote:
> Based on the patch called "simplify and fix flushing and synchronization"
> by Jerome Glisse.
>
> Rebased, removed unneded code, simplified more and cleaned up.
>
> Also, SH_ACTION_ENA is not set when changing shaders (hw doesn't seem
> to need it). It's only used to flush constant buffers.

Looks good, still would like to do some stress testing will try to do
that today.
Reviewed-by: Jerome Glisse <jglisse at redhat.com>

> ---
>  src/gallium/drivers/r600/evergreen_compute.c       |   20 +-
>  .../drivers/r600/evergreen_compute_internal.c      |    4 +-
>  src/gallium/drivers/r600/evergreen_state.c         |    7 +-
>  src/gallium/drivers/r600/evergreend.h              |    7 +-
>  src/gallium/drivers/r600/r600.h                    |   18 +-
>  src/gallium/drivers/r600/r600_hw_context.c         |  218 +++++++++++++-------
>  src/gallium/drivers/r600/r600_hw_context_priv.h    |    3 +-
>  src/gallium/drivers/r600/r600_pipe.c               |    2 -
>  src/gallium/drivers/r600/r600_pipe.h               |    4 -
>  src/gallium/drivers/r600/r600_state.c              |   21 +-
>  src/gallium/drivers/r600/r600_state_common.c       |   76 ++-----
>  src/gallium/drivers/r600/r600d.h                   |   12 ++
>  12 files changed, 210 insertions(+), 182 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
> index 3533312..1fb63d6 100644
> --- a/src/gallium/drivers/r600/evergreen_compute.c
> +++ b/src/gallium/drivers/r600/evergreen_compute.c
> @@ -96,7 +96,7 @@ static void evergreen_cs_set_vertex_buffer(
>         vb->buffer = buffer;
>         vb->user_buffer = NULL;
>
> -       r600_inval_vertex_cache(rctx);
> +       rctx->flags |= rctx->has_vertex_cache ? R600_CONTEXT_VTX_FLUSH : R600_CONTEXT_TEX_FLUSH;
>         state->enabled_mask |= 1 << vb_index;
>         state->dirty_mask |= 1 << vb_index;
>         r600_atom_dirty(rctx, &state->atom);
> @@ -332,8 +332,11 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
>          */
>         r600_emit_atom(ctx, &ctx->start_compute_cs_cmd.atom);
>
> +       ctx->flags |= R600_CONTEXT_CB_FLUSH;
> +       r600_flush_emit(ctx);
> +
>         /* Emit cb_state */
> -        cb_state = ctx->states[R600_PIPE_STATE_FRAMEBUFFER];
> +       cb_state = ctx->states[R600_PIPE_STATE_FRAMEBUFFER];
>         r600_context_pipe_state_emit(ctx, cb_state, RADEON_CP_PACKET3_COMPUTE_MODE);
>
>         /* Set CB_TARGET_MASK  XXX: Use cb_misc_state */
> @@ -384,15 +387,10 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
>         /* Emit dispatch state and dispatch packet */
>         evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout);
>
> -       /* r600_flush_framebuffer() updates the cb_flush_flags and then
> -        * calls r600_emit_atom() on the ctx->surface_sync_cmd.atom, which emits
> -        * a SURFACE_SYNC packet via r600_emit_surface_sync().
> -        *
> -        * XXX r600_emit_surface_sync() hardcodes the CP_COHER_SIZE to
> -        * 0xffffffff, so we will need to add a field to struct
> -        * r600_surface_sync_cmd if we want to manually set this value.
> +       /* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff
>          */
> -       r600_flush_framebuffer(ctx, true /* Flush now */);
> +       ctx->flags |= R600_CONTEXT_CB_FLUSH;
> +       r600_flush_emit(ctx);
>
>  #if 0
>         COMPUTE_DBG("cdw: %i\n", cs->cdw);
> @@ -444,7 +442,7 @@ void evergreen_emit_cs_shader(
>         r600_write_value(cs, r600_context_bo_reloc(rctx, shader->shader_code_bo,
>                                                         RADEON_USAGE_READ));
>
> -       r600_inval_shader_cache(rctx);
> +       rctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH;
>  }
>
>  static void evergreen_launch_grid(
> diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c
> index 50a60d3..dc95732 100644
> --- a/src/gallium/drivers/r600/evergreen_compute_internal.c
> +++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
> @@ -562,7 +562,7 @@ void evergreen_set_tex_resource(
>                              util_format_get_blockwidth(tmp->resource.b.b.format) *
>                              view->base.texture->width0*height*depth;
>
> -       r600_inval_texture_cache(pipe->ctx);
> +       pipe->ctx->flags |= R600_CONTEXT_TEX_FLUSH;
>
>         evergreen_emit_force_reloc(res);
>         evergreen_emit_force_reloc(res);
> @@ -621,7 +621,7 @@ void evergreen_set_const_cache(
>         res->usage = RADEON_USAGE_READ;
>         res->coher_bo_size = size;
>
> -       r600_inval_shader_cache(pipe->ctx);
> +       pipe->ctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH;
>  }
>
>  struct r600_resource* r600_compute_buffer_alloc_vram(
> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
> index 9a5183e..2a7a35f 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -1697,7 +1697,12 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
>         if (rstate == NULL)
>                 return;
>
> -       r600_flush_framebuffer(rctx, false);
> +       if (rctx->framebuffer.nr_cbufs) {
> +               rctx->flags |= R600_CONTEXT_CB_FLUSH;
> +       }
> +       if (rctx->framebuffer.zsbuf) {
> +               rctx->flags |= R600_CONTEXT_DB_FLUSH;
> +       }
>
>         /* unreference old buffer and reference new one */
>         rstate->id = R600_PIPE_STATE_FRAMEBUFFER;
> diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
> index e4d72f5..18e1eb7 100644
> --- a/src/gallium/drivers/r600/evergreend.h
> +++ b/src/gallium/drivers/r600/evergreend.h
> @@ -46,7 +46,8 @@
>  #define EVENT_TYPE_PS_PARTIAL_FLUSH            0x10
>  #define EVENT_TYPE_ZPASS_DONE                  0x15
>  #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT   0x16
> -#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH       0x1f
> +#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH       0x1f
> +#define EVENT_TYPE_FLUSH_AND_INV_DB_META       0x2c
>
>  #define                EVENT_TYPE(x)                           ((x) << 0)
>  #define                EVENT_INDEX(x)                          ((x) << 8)
> @@ -2186,16 +2187,12 @@
>  #define   C_0085F0_DB_DEST_BASE_ENA                    0xFFFFBFFF
>  #define   S_0085F0_CB8_DEST_BASE_ENA(x)                (((x) & 0x1) << 15)
>  #define   G_0085F0_CB8_DEST_BASE_ENA(x)                (((x) >> 15) & 0x1)
> -
>  #define   S_0085F0_CB9_DEST_BASE_ENA(x)                (((x) & 0x1) << 16)
>  #define   G_0085F0_CB9_DEST_BASE_ENA(x)                (((x) >> 16) & 0x1)
> -
>  #define   S_0085F0_CB10_DEST_BASE_ENA(x)               (((x) & 0x1) << 17)
>  #define   G_0085F0_CB10_DEST_BASE_ENA(x)               (((x) >> 17) & 0x1)
> -
>  #define   S_0085F0_CB11_DEST_BASE_ENA(x)               (((x) & 0x1) << 18)
>  #define   G_0085F0_CB11_DEST_BASE_ENA(x)               (((x) >> 18) & 0x1)
> -
>  #define   S_0085F0_TC_ACTION_ENA(x)                    (((x) & 0x1) << 23)
>  #define   G_0085F0_TC_ACTION_ENA(x)                    (((x) >> 23) & 0x1)
>  #define   C_0085F0_TC_ACTION_ENA                       0xFF7FFFFF
> diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
> index 1c8bd24..b6425cd 100644
> --- a/src/gallium/drivers/r600/r600.h
> +++ b/src/gallium/drivers/r600/r600.h
> @@ -180,9 +180,16 @@ struct r600_so_target {
>         unsigned                so_index;
>  };
>
> -#define R600_CONTEXT_DRAW_PENDING      (1 << 0)
> -#define R600_CONTEXT_DST_CACHES_DIRTY  (1 << 1)
> -#define R600_PARTIAL_FLUSH             (1 << 2)
> +#define R600_CONTEXT_PS_PARTIAL_FLUSH          (1 << 0)
> +#define R600_CONTEXT_CB_FLUSH                  (1 << 1)
> +#define R600_CONTEXT_DB_FLUSH                  (1 << 2)
> +#define R600_CONTEXT_SHADERCONST_FLUSH         (1 << 3)
> +#define R600_CONTEXT_TEX_FLUSH                 (1 << 4)
> +#define R600_CONTEXT_VTX_FLUSH                 (1 << 5)
> +#define R600_CONTEXT_STREAMOUT_FLUSH           (1 << 6)
> +#define R600_CONTEXT_WAIT_IDLE                 (1 << 7)
> +#define R600_CONTEXT_FLUSH_AND_INV             (1 << 8)
> +#define R600_CONTEXT_HTILE_ERRATA              (1 << 9)
>
>  struct r600_context;
>  struct r600_screen;
> @@ -196,10 +203,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags);
>
>  void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence,
>                               unsigned offset, unsigned value);
> -void r600_inval_shader_cache(struct r600_context *ctx);
> -void r600_inval_texture_cache(struct r600_context *ctx);
> -void r600_inval_vertex_cache(struct r600_context *ctx);
> -void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now);
> +void r600_flush_emit(struct r600_context *ctx);
>
>  void r600_context_streamout_begin(struct r600_context *ctx);
>  void r600_context_streamout_end(struct r600_context *ctx);
> diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
> index 0ec13e5..d40f6b6 100644
> --- a/src/gallium/drivers/r600/r600_hw_context.c
> +++ b/src/gallium/drivers/r600/r600_hw_context.c
> @@ -114,19 +114,6 @@ err:
>         return;
>  }
>
> -void r600_context_ps_partial_flush(struct r600_context *ctx)
> -{
> -       struct radeon_winsys_cs *cs = ctx->cs;
> -
> -       if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING))
> -               return;
> -
> -       cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
> -       cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
> -
> -       ctx->flags &= ~R600_CONTEXT_DRAW_PENDING;
> -}
> -
>  static void r600_init_block(struct r600_context *ctx,
>                             struct r600_block *block,
>                             const struct r600_reg *reg, int index, int nreg,
> @@ -665,7 +652,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
>         }
>
>         /* Count in framebuffer cache flushes at the end of CS. */
> -       num_dw += 7; /* one SURFACE_SYNC and CACHE_FLUSH_AND_INV (r6xx-only) */
> +       num_dw += 44; /* one SURFACE_SYNC and CACHE_FLUSH_AND_INV (r6xx-only) */
>
>         /* Save 16 dwords for the fence mechanism. */
>         num_dw += 16;
> @@ -693,7 +680,7 @@ void r600_context_dirty_block(struct r600_context *ctx,
>                 LIST_ADDTAIL(&block->list,&ctx->dirty);
>
>                 if (block->flags & REG_FLAG_FLUSH_CHANGE) {
> -                       r600_context_ps_partial_flush(ctx);
> +                       ctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
>                 }
>         }
>  }
> @@ -861,54 +848,138 @@ out:
>         LIST_DELINIT(&block->list);
>  }
>
> -void r600_inval_shader_cache(struct r600_context *ctx)
> +void r600_flush_emit(struct r600_context *rctx)
>  {
> -       ctx->surface_sync_cmd.flush_flags |= S_0085F0_SH_ACTION_ENA(1);
> -       r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom);
> -}
> +       struct radeon_winsys_cs *cs = rctx->cs;
>
> -void r600_inval_texture_cache(struct r600_context *ctx)
> -{
> -       ctx->surface_sync_cmd.flush_flags |= S_0085F0_TC_ACTION_ENA(1);
> -       r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom);
> -}
> +       if (!rctx->flags) {
> +               return;
> +       }
>
> -void r600_inval_vertex_cache(struct r600_context *ctx)
> -{
> -       if (ctx->has_vertex_cache) {
> -               ctx->surface_sync_cmd.flush_flags |= S_0085F0_VC_ACTION_ENA(1);
> -       } else {
> -               /* Some GPUs don't have the vertex cache and must use the texture cache instead. */
> -               ctx->surface_sync_cmd.flush_flags |= S_0085F0_TC_ACTION_ENA(1);
> +       if (rctx->flags & R600_CONTEXT_PS_PARTIAL_FLUSH) {
> +               cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
> +               cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
>         }
> -       r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom);
> -}
>
> -void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now)
> -{
> -       if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY))
> -               return;
> +       if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) {
> +               cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
> +               cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
> +
> +               /* DB flushes are special due to errata with hyperz, we need to
> +                * insert a no-op, so that the cache has time to really flush.
> +                */
> +               if (rctx->chip_class <= R700 &&
> +                   rctx->flags & R600_CONTEXT_HTILE_ERRATA) {
> +                       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 31, 0);
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +                       cs->buf[cs->cdw++] = 0xdeadcafe;
> +               }
> +       }
>
> -       ctx->surface_sync_cmd.flush_flags |=
> -               r600_get_cb_flush_flags(ctx) |
> -               (ctx->framebuffer.zsbuf ? S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1) : 0);
> +       if (rctx->flags & (R600_CONTEXT_CB_FLUSH |
> +                          R600_CONTEXT_DB_FLUSH |
> +                          R600_CONTEXT_SHADERCONST_FLUSH |
> +                          R600_CONTEXT_TEX_FLUSH |
> +                          R600_CONTEXT_VTX_FLUSH |
> +                          R600_CONTEXT_STREAMOUT_FLUSH)) {
> +               /* anything left (cb, vtx, shader, streamout) can be flushed
> +                * using the surface sync packet
> +                */
> +               unsigned flags = 0;
> +
> +               if (rctx->flags & R600_CONTEXT_CB_FLUSH) {
> +                       flags |= S_0085F0_CB_ACTION_ENA(1) |
> +                                S_0085F0_CB0_DEST_BASE_ENA(1) |
> +                                S_0085F0_CB1_DEST_BASE_ENA(1) |
> +                                S_0085F0_CB2_DEST_BASE_ENA(1) |
> +                                S_0085F0_CB3_DEST_BASE_ENA(1) |
> +                                S_0085F0_CB4_DEST_BASE_ENA(1) |
> +                                S_0085F0_CB5_DEST_BASE_ENA(1) |
> +                                S_0085F0_CB6_DEST_BASE_ENA(1) |
> +                                S_0085F0_CB7_DEST_BASE_ENA(1);
> +
> +                       if (rctx->chip_class >= EVERGREEN) {
> +                               flags |= S_0085F0_CB8_DEST_BASE_ENA(1) |
> +                                        S_0085F0_CB9_DEST_BASE_ENA(1) |
> +                                        S_0085F0_CB10_DEST_BASE_ENA(1) |
> +                                        S_0085F0_CB11_DEST_BASE_ENA(1);
> +                       }
>
> -       if (flush_now) {
> -               r600_emit_atom(ctx, &ctx->surface_sync_cmd.atom);
> -       } else {
> -               r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom);
> -       }
> +                       /* RV670 errata
> +                        * (CB1_DEST_BASE_ENA is also required, which is
> +                        * included unconditionally above). */
> +                       if (rctx->family == CHIP_RV670 ||
> +                           rctx->family == CHIP_RS780 ||
> +                           rctx->family == CHIP_RS880) {
> +                               flags |= S_0085F0_DEST_BASE_0_ENA(1);
> +                       }
> +               }
>
> -       /* Also add a complete cache flush to work around broken flushing on R6xx. */
> -       if (ctx->chip_class == R600) {
> -               if (flush_now) {
> -                       r600_emit_atom(ctx, &ctx->r6xx_flush_and_inv_cmd);
> -               } else {
> -                       r600_atom_dirty(ctx, &ctx->r6xx_flush_and_inv_cmd);
> +               if (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) {
> +                       flags |= S_0085F0_SO0_DEST_BASE_ENA(1) |
> +                                S_0085F0_SO1_DEST_BASE_ENA(1) |
> +                                S_0085F0_SO2_DEST_BASE_ENA(1) |
> +                                S_0085F0_SO3_DEST_BASE_ENA(1) |
> +                                S_0085F0_SMX_ACTION_ENA(1);
> +
> +                       /* RV670 errata */
> +                       if (rctx->family == CHIP_RV670 ||
> +                           rctx->family == CHIP_RS780 ||
> +                           rctx->family == CHIP_RS880) {
> +                               flags |= S_0085F0_DEST_BASE_0_ENA(1);
> +                       }
>                 }
> +
> +               flags |= (rctx->flags & R600_CONTEXT_DB_FLUSH) ? S_0085F0_DB_ACTION_ENA(1) |
> +                                                                S_0085F0_DB_DEST_BASE_ENA(1): 0;
> +               flags |= (rctx->flags & R600_CONTEXT_SHADERCONST_FLUSH) ? S_0085F0_SH_ACTION_ENA(1) : 0;
> +               flags |= (rctx->flags & R600_CONTEXT_TEX_FLUSH) ? S_0085F0_TC_ACTION_ENA(1) : 0;
> +               flags |= (rctx->flags & R600_CONTEXT_VTX_FLUSH) ? S_0085F0_VC_ACTION_ENA(1) : 0;
> +
> +               cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
> +               cs->buf[cs->cdw++] = flags;           /* CP_COHER_CNTL */
> +               cs->buf[cs->cdw++] = 0xffffffff;      /* CP_COHER_SIZE */
> +               cs->buf[cs->cdw++] = 0;               /* CP_COHER_BASE */
> +               cs->buf[cs->cdw++] = 0x0000000A;      /* POLL_INTERVAL */
> +       }
> +
> +       if (rctx->flags & R600_CONTEXT_WAIT_IDLE) {
> +               /* wait for things to settle */
> +               r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
>         }
>
> -       ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY;
> +       /* everything is properly flushed */
> +       rctx->flags = 0;
>  }
>
>  void r600_context_flush(struct r600_context *ctx, unsigned flags)
> @@ -937,10 +1008,18 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
>                 streamout_suspended = true;
>         }
>
> -       r600_flush_framebuffer(ctx, true);
> -
>         /* partial flush is needed to avoid lockups on some chips with user fences */
> -       r600_context_ps_partial_flush(ctx);
> +       ctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
> +
> +       /* flush the framebuffer */
> +       ctx->flags |= R600_CONTEXT_CB_FLUSH | R600_CONTEXT_DB_FLUSH;
> +
> +       /* R6xx errata */
> +       if (ctx->chip_class == R600) {
> +               ctx->flags |= R600_CONTEXT_FLUSH_AND_INV;
> +       }
> +
> +       r600_flush_emit(ctx);
>
>         /* old kernels and userspace don't set SX_MISC, so we must reset it to 0 here */
>         if (ctx->chip_class <= R700) {
> @@ -959,10 +1038,6 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
>         /* Begin a new CS. */
>         r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
>
> -       /* Invalidate caches. */
> -       r600_inval_texture_cache(ctx);
> -       r600_flush_framebuffer(ctx, false);
> -
>         /* Re-emit states. */
>         r600_atom_dirty(ctx, &ctx->alphatest_state.atom);
>         r600_atom_dirty(ctx, &ctx->cb_misc_state.atom);
> @@ -1024,7 +1099,10 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fen
>         va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo);
>         va = va + (offset << 2);
>
> -       r600_context_ps_partial_flush(ctx);
> +       ctx->flags &= ~R600_CONTEXT_PS_PARTIAL_FLUSH;
> +       cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
> +       cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
> +
>         cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
>         cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
>         cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL;       /* ADDRESS_LO */
> @@ -1185,7 +1263,7 @@ void r600_context_streamout_end(struct r600_context *ctx)
>  {
>         struct radeon_winsys_cs *cs = ctx->cs;
>         struct r600_so_target **t = ctx->so_targets;
> -       unsigned i, flush_flags = 0;
> +       unsigned i;
>         uint64_t va;
>
>         if (ctx->chip_class >= EVERGREEN) {
> @@ -1212,7 +1290,6 @@ void r600_context_streamout_end(struct r600_context *ctx)
>                                 r600_context_bo_reloc(ctx,  t[i]->filled_size,
>                                                       RADEON_USAGE_WRITE);
>
> -                       flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i;
>                 }
>         }
>
> @@ -1221,22 +1298,11 @@ void r600_context_streamout_end(struct r600_context *ctx)
>         } else {
>                 r600_set_streamout_enable(ctx, 0);
>         }
> +       ctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH;
>
> -       /* This is needed to fix cache flushes on r600. */
> +       /* R6xx errata */
>         if (ctx->chip_class == R600) {
> -               if (ctx->family == CHIP_RV670 ||
> -                   ctx->family == CHIP_RS780 ||
> -                   ctx->family == CHIP_RS880) {
> -                       flush_flags |= S_0085F0_DEST_BASE_0_ENA(1);
> -               }
> -
> -               r600_atom_dirty(ctx, &ctx->r6xx_flush_and_inv_cmd);
> +               ctx->flags |= R600_CONTEXT_FLUSH_AND_INV;
>         }
> -
> -       /* Flush streamout caches. */
> -       ctx->surface_sync_cmd.flush_flags |=
> -               S_0085F0_SMX_ACTION_ENA(1) | flush_flags;
> -       r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom);
> -
>         ctx->num_cs_dw_streamout_end = 0;
>  }
> diff --git a/src/gallium/drivers/r600/r600_hw_context_priv.h b/src/gallium/drivers/r600/r600_hw_context_priv.h
> index 8d0ebc9..73500c7 100644
> --- a/src/gallium/drivers/r600/r600_hw_context_priv.h
> +++ b/src/gallium/drivers/r600/r600_hw_context_priv.h
> @@ -28,7 +28,8 @@
>
>  #include "r600_pipe.h"
>
> -#define R600_MAX_DRAW_CS_DWORDS 16
> +/* the number of CS dwords for flushing and drawing */
> +#define R600_MAX_DRAW_CS_DWORDS 64
>
>  /* these flags are used in register flags and added into block flags */
>  #define REG_FLAG_NEED_BO 1
> diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
> index 9e6c28d..d0dd4d5 100644
> --- a/src/gallium/drivers/r600/r600_pipe.c
> +++ b/src/gallium/drivers/r600/r600_pipe.c
> @@ -242,8 +242,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
>         rctx->context.create_video_decoder = vl_create_decoder;
>         rctx->context.create_video_buffer = vl_video_buffer_create;
>
> -       r600_init_common_atoms(rctx);
> -
>         switch (rctx->chip_class) {
>         case R600:
>         case R700:
> diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
> index ff720e9..8ce8532 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -370,8 +370,6 @@ struct r600_context {
>         /** Compute specific registers initializations.  The start_cs_cmd atom
>          *  must be emitted before start_compute_cs_cmd. */
>          struct r600_command_buffer      start_compute_cs_cmd;
> -       struct r600_surface_sync_cmd    surface_sync_cmd;
> -       struct r600_atom                r6xx_flush_and_inv_cmd;
>         struct r600_alphatest_state     alphatest_state;
>         struct r600_cb_misc_state       cb_misc_state;
>         struct r600_db_misc_state       db_misc_state;
> @@ -575,8 +573,6 @@ void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom
>  void r600_init_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id,
>                     void (*emit)(struct r600_context *ctx, struct r600_atom *state),
>                     unsigned num_dw);
> -void r600_init_common_atoms(struct r600_context *rctx);
> -unsigned r600_get_cb_flush_flags(struct r600_context *rctx);
>  void r600_texture_barrier(struct pipe_context *ctx);
>  void r600_set_index_buffer(struct pipe_context *ctx,
>                            const struct pipe_index_buffer *ib);
> diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
> index ccafdc6..7dd4148 100644
> --- a/src/gallium/drivers/r600/r600_state.c
> +++ b/src/gallium/drivers/r600/r600_state.c
> @@ -1600,13 +1600,23 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
>         if (rstate == NULL)
>                 return;
>
> -       r600_flush_framebuffer(rctx, false);
> +       if (rctx->framebuffer.nr_cbufs) {
> +               rctx->flags |= R600_CONTEXT_CB_FLUSH;
> +       }
> +       if (rctx->framebuffer.zsbuf) {
> +               rctx->flags |= R600_CONTEXT_DB_FLUSH;
> +       }
> +       /* R6xx errata */
> +       if (rctx->chip_class == R600) {
> +               rctx->flags |= R600_CONTEXT_FLUSH_AND_INV;
> +       }
>
>         /* unreference old buffer and reference new one */
>         rstate->id = R600_PIPE_STATE_FRAMEBUFFER;
>
>         util_copy_framebuffer_state(&rctx->framebuffer, state);
>
> +
>         /* Colorbuffers. */
>         rctx->export_16bpc = true;
>         rctx->nr_cbufs = state->nr_cbufs;
> @@ -2125,14 +2135,7 @@ void r600_adjust_gprs(struct r600_context *rctx)
>         unsigned tmp;
>         int diff;
>
> -       /* XXX: Following call moved from r600_bind_[ps|vs]_shader,
> -        * it seems eg+ doesn't need it, r6xx/7xx probably need it only for
> -        * adjusting the GPR allocation?
> -        * Do we need this if we aren't really changing config below? */
> -       r600_inval_shader_cache(rctx);
> -
> -       if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs)
> -       {
> +       if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) {
>                 diff = rctx->ps_shader->current->shader.bc.ngpr - rctx->default_ps_gprs;
>                 num_vs_gprs -= diff;
>                 num_ps_gprs += diff;
> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
> index e67eba8..b94d1b2 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -56,27 +56,6 @@ void r600_release_command_buffer(struct r600_command_buffer *cb)
>         FREE(cb->buf);
>  }
>
> -static void r600_emit_surface_sync(struct r600_context *rctx, struct r600_atom *atom)
> -{
> -       struct radeon_winsys_cs *cs = rctx->cs;
> -       struct r600_surface_sync_cmd *a = (struct r600_surface_sync_cmd*)atom;
> -
> -       cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
> -       cs->buf[cs->cdw++] = a->flush_flags;  /* CP_COHER_CNTL */
> -       cs->buf[cs->cdw++] = 0xffffffff;      /* CP_COHER_SIZE */
> -       cs->buf[cs->cdw++] = 0;               /* CP_COHER_BASE */
> -       cs->buf[cs->cdw++] = 0x0000000A;      /* POLL_INTERVAL */
> -
> -       a->flush_flags = 0;
> -}
> -
> -static void r600_emit_r6xx_flush_and_inv(struct r600_context *rctx, struct r600_atom *atom)
> -{
> -       struct radeon_winsys_cs *cs = rctx->cs;
> -       cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
> -       cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
> -}
> -
>  void r600_init_atom(struct r600_context *rctx,
>                     struct r600_atom *atom,
>                     unsigned id,
> @@ -108,37 +87,16 @@ void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom
>         r600_write_context_reg(cs, R_028438_SX_ALPHA_REF, alpha_ref);
>  }
>
> -void r600_init_common_atoms(struct r600_context *rctx)
> -{
> -       r600_init_atom(rctx, &rctx->r6xx_flush_and_inv_cmd, 2, r600_emit_r6xx_flush_and_inv, 2);
> -       r600_init_atom(rctx, &rctx->surface_sync_cmd.atom, 3, r600_emit_surface_sync, 5);
> -}
> -
> -unsigned r600_get_cb_flush_flags(struct r600_context *rctx)
> -{
> -       unsigned flags = 0;
> -
> -       if (rctx->framebuffer.nr_cbufs) {
> -               flags |= S_0085F0_CB_ACTION_ENA(1) |
> -                        (((1 << rctx->framebuffer.nr_cbufs) - 1) << S_0085F0_CB0_DEST_BASE_ENA_SHIFT);
> -       }
> -
> -       /* Workaround for broken flushing on some R6xx chipsets. */
> -       if (rctx->family == CHIP_RV670 ||
> -           rctx->family == CHIP_RS780 ||
> -           rctx->family == CHIP_RS880) {
> -               flags |=  S_0085F0_CB1_DEST_BASE_ENA(1) |
> -                         S_0085F0_DEST_BASE_0_ENA(1);
> -       }
> -       return flags;
> -}
> -
>  void r600_texture_barrier(struct pipe_context *ctx)
>  {
>         struct r600_context *rctx = (struct r600_context *)ctx;
>
> -       rctx->surface_sync_cmd.flush_flags |= S_0085F0_TC_ACTION_ENA(1) | r600_get_cb_flush_flags(rctx);
> -       r600_atom_dirty(rctx, &rctx->surface_sync_cmd.atom);
> +       rctx->flags |= R600_CONTEXT_CB_FLUSH | R600_CONTEXT_TEX_FLUSH;
> +
> +       /* R6xx errata */
> +       if (rctx->chip_class == R600) {
> +               rctx->flags |= R600_CONTEXT_FLUSH_AND_INV;
> +       }
>  }
>
>  static bool r600_conv_pipe_prim(unsigned pprim, unsigned *prim)
> @@ -424,7 +382,7 @@ static void r600_bind_samplers(struct pipe_context *pipe,
>                 }
>                 if (sampler->border_color_use) {
>                         dst->atom_sampler.num_dw += 11;
> -                       rctx->flags |= R600_PARTIAL_FLUSH;
> +                       rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
>                 } else {
>                         dst->atom_sampler.num_dw += 5;
>                 }
> @@ -432,7 +390,7 @@ static void r600_bind_samplers(struct pipe_context *pipe,
>         }
>         if (rctx->chip_class <= R700 && seamless_cube_map != -1 && seamless_cube_map != rctx->seamless_cube_map.enabled) {
>                 /* change in TA_CNTL_AUX need a pipeline flush */
> -               rctx->flags |= R600_PARTIAL_FLUSH;
> +               rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
>                 rctx->seamless_cube_map.enabled = seamless_cube_map;
>                 r600_atom_dirty(rctx, &rctx->seamless_cube_map.atom);
>         }
> @@ -477,8 +435,6 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
>
>         rctx->vertex_elements = v;
>         if (v) {
> -               r600_inval_shader_cache(rctx);
> -
>                 rctx->states[v->rstate.id] = &v->rstate;
>                 r600_context_pipe_state_set(rctx, &v->rstate);
>         }
> @@ -515,7 +471,7 @@ void r600_set_index_buffer(struct pipe_context *ctx,
>  void r600_vertex_buffers_dirty(struct r600_context *rctx)
>  {
>         if (rctx->vertex_buffer_state.dirty_mask) {
> -               r600_inval_vertex_cache(rctx);
> +               rctx->flags |= rctx->has_vertex_cache ? R600_CONTEXT_VTX_FLUSH : R600_CONTEXT_TEX_FLUSH;
>                 rctx->vertex_buffer_state.atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 11) *
>                                                util_bitcount(rctx->vertex_buffer_state.dirty_mask);
>                 r600_atom_dirty(rctx, &rctx->vertex_buffer_state.atom);
> @@ -570,7 +526,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx,
>                               struct r600_samplerview_state *state)
>  {
>         if (state->dirty_mask) {
> -               r600_inval_texture_cache(rctx);
> +               rctx->flags |= R600_CONTEXT_TEX_FLUSH;
>                 state->atom.num_dw = (rctx->chip_class >= EVERGREEN ? 14 : 13) *
>                                      util_bitcount(state->dirty_mask);
>                 r600_atom_dirty(rctx, &state->atom);
> @@ -898,7 +854,7 @@ void r600_delete_vs_shader(struct pipe_context *ctx, void *state)
>  void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state)
>  {
>         if (state->dirty_mask) {
> -               r600_inval_shader_cache(rctx);
> +               rctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH;
>                 state->atom.num_dw = rctx->chip_class >= EVERGREEN ? util_bitcount(state->dirty_mask)*20
>                                                                    : util_bitcount(state->dirty_mask)*19;
>                 r600_atom_dirty(rctx, &state->atom);
> @@ -1148,13 +1104,6 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
>
>         r600_update_derived_state(rctx);
>
> -       /* partial flush triggered by border color change */
> -       if (rctx->flags & R600_PARTIAL_FLUSH) {
> -               rctx->flags &= ~R600_PARTIAL_FLUSH;
> -               r600_write_value(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
> -               r600_write_value(cs, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
> -       }
> -
>         if (info.indexed) {
>                 /* Initialize the index buffer struct. */
>                 pipe_resource_reference(&ib.buffer, rctx->index_buffer.buffer);
> @@ -1221,6 +1170,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
>
>         /* Emit states (the function expects that we emit at most 17 dwords here). */
>         r600_need_cs_space(rctx, 0, TRUE);
> +       r600_flush_emit(rctx);
>
>         for (i = 0; i < R600_MAX_ATOM; i++) {
>                 if (rctx->atoms[i] == NULL || !rctx->atoms[i]->dirty) {
> @@ -1275,8 +1225,6 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
>                                         (info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0);
>         }
>
> -       rctx->flags |= R600_CONTEXT_DST_CACHES_DIRTY | R600_CONTEXT_DRAW_PENDING;
> -
>         /* Set the depth buffer as dirty. */
>         if (rctx->framebuffer.zsbuf) {
>                 struct pipe_surface *surf = rctx->framebuffer.zsbuf;
> diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
> index 28423e1..4bd7716 100644
> --- a/src/gallium/drivers/r600/r600d.h
> +++ b/src/gallium/drivers/r600/r600d.h
> @@ -3341,9 +3341,21 @@
>  #define   S_0085F0_DB_DEST_BASE_ENA(x)                 (((x) & 0x1) << 14)
>  #define   G_0085F0_DB_DEST_BASE_ENA(x)                 (((x) >> 14) & 0x1)
>  #define   C_0085F0_DB_DEST_BASE_ENA                    0xFFFFBFFF
> +/* r600 only start */
>  #define   S_0085F0_CR_DEST_BASE_ENA(x)                 (((x) & 0x1) << 15)
>  #define   G_0085F0_CR_DEST_BASE_ENA(x)                 (((x) >> 15) & 0x1)
>  #define   C_0085F0_CR_DEST_BASE_ENA                    0xFFFF7FFF
> +/* r600 only end */
> +/* evergreen only start */
> +#define   S_0085F0_CB8_DEST_BASE_ENA(x)                (((x) & 0x1) << 15)
> +#define   G_0085F0_CB8_DEST_BASE_ENA(x)                (((x) >> 15) & 0x1)
> +#define   S_0085F0_CB9_DEST_BASE_ENA(x)                (((x) & 0x1) << 16)
> +#define   G_0085F0_CB9_DEST_BASE_ENA(x)                (((x) >> 16) & 0x1)
> +#define   S_0085F0_CB10_DEST_BASE_ENA(x)               (((x) & 0x1) << 17)
> +#define   G_0085F0_CB10_DEST_BASE_ENA(x)               (((x) >> 17) & 0x1)
> +#define   S_0085F0_CB11_DEST_BASE_ENA(x)               (((x) & 0x1) << 18)
> +#define   G_0085F0_CB11_DEST_BASE_ENA(x)               (((x) >> 18) & 0x1)
> +/* evergreen only end */
>  #define   S_0085F0_TC_ACTION_ENA(x)                    (((x) & 0x1) << 23)
>  #define   G_0085F0_TC_ACTION_ENA(x)                    (((x) >> 23) & 0x1)
>  #define   C_0085F0_TC_ACTION_ENA                       0xFF7FFFFF
> --
> 1.7.9.5
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list