[Mesa-dev] [PATCH] r600g: add cs memory usage accounting and limit it v2

Marek Olšák maraeo at gmail.com
Thu Jan 31 08:37:26 PST 2013


Reviewed-by: Marek Olšák <maraeo at gmail.com>

Marek

On Thu, Jan 31, 2013 at 4:38 PM,  <j.glisse at gmail.com> wrote:
> From: Jerome Glisse <jglisse at redhat.com>
>
> We are now seing cs that can go over the vram+gtt size to avoid
> failing flush early cs that goes over 70% (gtt+vram) usage. 70%
> is use to allow some fragmentation.
>
> The idea is to compute a gross estimate of memory requirement of
> each draw call. After each draw call, memory will be precisely
> accounted. So the uncertainty is only on the current draw call.
> In practice this gave very good estimate (+/- 10% of the target
> memory limit).
>
> v2: Remove left over from testing version, remove useless NULL
>     checking. Improve commit message.
>
> Signed-off-by: Jerome Glisse <jglisse at redhat.com>
> ---
>  src/gallium/drivers/r600/evergreen_state.c    |  4 ++++
>  src/gallium/drivers/r600/r600_hw_context.c    | 12 ++++++++++++
>  src/gallium/drivers/r600/r600_pipe.h          | 21 +++++++++++++++++++++
>  src/gallium/drivers/r600/r600_state.c         |  3 +++
>  src/gallium/drivers/r600/r600_state_common.c  | 13 ++++++++++++-
>  src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 11 +++++++++++
>  src/gallium/winsys/radeon/drm/radeon_winsys.h | 10 ++++++++++
>  7 files changed, 73 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
> index 0a3861f..5dd8b13 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -1668,6 +1668,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
>                 surf = (struct r600_surface*)state->cbufs[i];
>                 rtex = (struct r600_texture*)surf->base.texture;
>
> +               r600_context_add_resource_size(ctx, state->cbufs[i]->texture);
> +
>                 if (!surf->color_initialized) {
>                         evergreen_init_color_surface(rctx, surf);
>                 }
> @@ -1699,6 +1701,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
>         if (state->zsbuf) {
>                 surf = (struct r600_surface*)state->zsbuf;
>
> +               r600_context_add_resource_size(ctx, state->zsbuf->texture);
> +
>                 if (!surf->depth_initialized) {
>                         evergreen_init_depth_surface(rctx, surf);
>                 }
> diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
> index 23f488a..a89f230 100644
> --- a/src/gallium/drivers/r600/r600_hw_context.c
> +++ b/src/gallium/drivers/r600/r600_hw_context.c
> @@ -359,6 +359,16 @@ out_err:
>  void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
>                         boolean count_draw_in)
>  {
> +       if (!ctx->ws->cs_memory_below_limit(ctx->rings.gfx.cs, ctx->vram, ctx->gtt)) {
> +               ctx->gtt = 0;
> +               ctx->vram = 0;
> +               ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC);
> +               return;
> +       }
> +       /* all will be accounted once relocation are emited */
> +       ctx->gtt = 0;
> +       ctx->vram = 0;
> +
>         /* The number of dwords we already used in the CS so far. */
>         num_dw += ctx->rings.gfx.cs->cdw;
>
> @@ -784,6 +794,8 @@ void r600_begin_new_cs(struct r600_context *ctx)
>
>         ctx->pm4_dirty_cdwords = 0;
>         ctx->flags = 0;
> +       ctx->gtt = 0;
> +       ctx->vram = 0;
>
>         /* Begin a new CS. */
>         r600_emit_command_buffer(ctx->rings.gfx.cs, &ctx->start_cs_cmd);
> diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
> index 3ff42d3..42b4e7c 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -447,6 +447,10 @@ struct r600_context {
>         unsigned                        backend_mask;
>         unsigned                        max_db; /* for OQ */
>
> +       /* current unaccounted memory usage */
> +       uint64_t                        vram;
> +       uint64_t                        gtt;
> +
>         /* Miscellaneous state objects. */
>         void                            *custom_dsa_flush;
>         void                            *custom_blend_resolve;
> @@ -998,4 +1002,21 @@ static INLINE unsigned u_max_layer(struct pipe_resource *r, unsigned level)
>         }
>  }
>
> +static INLINE void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
> +{
> +       struct r600_context *rctx = (struct r600_context *)ctx;
> +       struct r600_resource *rr = (struct r600_resource *)r;
> +
> +       if (r == NULL) {
> +               return;
> +       }
> +
> +       if (rr->domains & RADEON_DOMAIN_GTT) {
> +               rctx->gtt += rr->buf->size;
> +       }
> +       if (rr->domains & RADEON_DOMAIN_VRAM) {
> +               rctx->vram += rr->buf->size;
> +       }
> +}
> +
>  #endif
> diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
> index c0bc2a5..44cd00e 100644
> --- a/src/gallium/drivers/r600/r600_state.c
> +++ b/src/gallium/drivers/r600/r600_state.c
> @@ -1544,6 +1544,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
>
>                 surf = (struct r600_surface*)state->cbufs[i];
>                 rtex = (struct r600_texture*)surf->base.texture;
> +               r600_context_add_resource_size(ctx, state->cbufs[i]->texture);
>
>                 if (!surf->color_initialized || force_cmask_fmask) {
>                         r600_init_color_surface(rctx, surf, force_cmask_fmask);
> @@ -1576,6 +1577,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
>         if (state->zsbuf) {
>                 surf = (struct r600_surface*)state->zsbuf;
>
> +               r600_context_add_resource_size(ctx, state->zsbuf->texture);
> +
>                 if (!surf->depth_initialized) {
>                         r600_init_depth_surface(rctx, surf);
>                 }
> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
> index 9386f61..33200a6 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -479,7 +479,8 @@ static void r600_set_index_buffer(struct pipe_context *ctx,
>
>         if (ib) {
>                 pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer);
> -               memcpy(&rctx->index_buffer, ib, sizeof(*ib));
> +               memcpy(&rctx->index_buffer, ib, sizeof(*ib));
> +               r600_context_add_resource_size(ctx, ib->buffer);
>         } else {
>                 pipe_resource_reference(&rctx->index_buffer.buffer, NULL);
>         }
> @@ -516,6 +517,7 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx,
>                                         vb[i].buffer_offset = input[i].buffer_offset;
>                                         pipe_resource_reference(&vb[i].buffer, input[i].buffer);
>                                         new_buffer_mask |= 1 << i;
> +                                       r600_context_add_resource_size(ctx, input[i].buffer);
>                                 } else {
>                                         pipe_resource_reference(&vb[i].buffer, NULL);
>                                         disable_mask |= 1 << i;
> @@ -613,6 +615,7 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
>
>                         pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], views[i]);
>                         new_mask |= 1 << i;
> +                       r600_context_add_resource_size(pipe, views[i]->texture);
>                 } else {
>                         pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], NULL);
>                         disable_mask |= 1 << i;
> @@ -806,6 +809,8 @@ static void r600_bind_ps_state(struct pipe_context *ctx, void *state)
>         rctx->ps_shader = (struct r600_pipe_shader_selector *)state;
>         r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate);
>
> +       r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->ps_shader->current->bo);
> +
>         if (rctx->chip_class <= R700) {
>                 bool multiwrite = rctx->ps_shader->current->shader.fs_write_all;
>
> @@ -835,6 +840,8 @@ static void r600_bind_vs_state(struct pipe_context *ctx, void *state)
>         if (state) {
>                 r600_context_pipe_state_set(rctx, &rctx->vs_shader->current->rstate);
>
> +               r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->vs_shader->current->bo);
> +
>                 /* Update clip misc state. */
>                 if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl ||
>                     rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write) {
> @@ -938,10 +945,13 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint
>                 } else {
>                         u_upload_data(rctx->uploader, 0, input->buffer_size, ptr, &cb->buffer_offset, &cb->buffer);
>                 }
> +               /* account it in gtt */
> +               rctx->gtt += input->buffer_size;
>         } else {
>                 /* Setup the hw buffer. */
>                 cb->buffer_offset = input->buffer_offset;
>                 pipe_resource_reference(&cb->buffer, input->buffer);
> +               r600_context_add_resource_size(ctx, input->buffer);
>         }
>
>         state->enabled_mask |= 1 << index;
> @@ -1004,6 +1014,7 @@ static void r600_set_so_targets(struct pipe_context *ctx,
>         /* Set the new targets. */
>         for (i = 0; i < num_targets; i++) {
>                 pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]);
> +               r600_context_add_resource_size(ctx, targets[i]->buffer);
>         }
>         for (; i < rctx->num_so_targets; i++) {
>                 pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL);
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> index cab2704..6a7115b 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> @@ -383,6 +383,16 @@ static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
>      return status;
>  }
>
> +static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
> +{
> +    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
> +    boolean status =
> +        (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 &&
> +        (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7;
> +
> +    return status;
> +}
> +
>  static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
>                                        struct radeon_winsys_cs_handle *buf)
>  {
> @@ -575,6 +585,7 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
>      ws->base.cs_destroy = radeon_drm_cs_destroy;
>      ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
>      ws->base.cs_validate = radeon_drm_cs_validate;
> +    ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
>      ws->base.cs_write_reloc = radeon_drm_cs_write_reloc;
>      ws->base.cs_flush = radeon_drm_cs_flush;
>      ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush;
> diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
> index 7fdef3f..8b64ef2 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
> +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
> @@ -393,6 +393,16 @@ struct radeon_winsys {
>      boolean (*cs_validate)(struct radeon_winsys_cs *cs);
>
>      /**
> +     * Return TRUE if there is enough memory in VRAM and GTT for the relocs
> +     * added so far.
> +     *
> +     * \param cs        A command stream to validate.
> +     * \param vram      VRAM memory size pending to be use
> +     * \param gtt       GTT memory size pending to be use
> +     */
> +    boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt);
> +
> +    /**
>       * Write a relocated dword to a command buffer.
>       *
>       * \param cs        A command stream the relocation is written to.
> --
> 1.7.11.7
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list