[Mesa-dev] [PATCH] r600g: add cs memory usage accounting and limit it

Jerome Glisse j.glisse at gmail.com
Thu Jan 31 07:29:07 PST 2013


On Wed, Jan 30, 2013 at 10:35 PM, Marek Olšák <maraeo at gmail.com> wrote:
> On Wed, Jan 30, 2013 at 6:14 PM,  <j.glisse at gmail.com> wrote:
>> From: Jerome Glisse <jglisse at redhat.com>
>>
>> We are now seing cs that can go over the vram+gtt size to avoid
>> failing flush early cs that goes over 70% (gtt+vram) usage. 70%
>> is use to allow some fragmentation.
>>
>> Signed-off-by: Jerome Glisse <jglisse at redhat.com>
>> ---
>>  src/gallium/drivers/r600/evergreen_state.c    |  4 ++++
>>  src/gallium/drivers/r600/r600.h               |  1 +
>>  src/gallium/drivers/r600/r600_buffer.c        |  1 +
>>  src/gallium/drivers/r600/r600_hw_context.c    | 12 ++++++++++++
>>  src/gallium/drivers/r600/r600_pipe.c          |  3 +++
>>  src/gallium/drivers/r600/r600_pipe.h          | 21 +++++++++++++++++++++
>>  src/gallium/drivers/r600/r600_state.c         |  3 +++
>>  src/gallium/drivers/r600/r600_state_common.c  | 17 ++++++++++++++++-
>>  src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 11 +++++++++++
>>  src/gallium/winsys/radeon/drm/radeon_winsys.h | 10 ++++++++++
>>  10 files changed, 82 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
>> index be1c427..84f8dce 100644
>> --- a/src/gallium/drivers/r600/evergreen_state.c
>> +++ b/src/gallium/drivers/r600/evergreen_state.c
>> @@ -1668,6 +1668,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
>>                 surf = (struct r600_surface*)state->cbufs[i];
>>                 rtex = (struct r600_texture*)surf->base.texture;
>>
>> +               r600_context_add_resource_size(ctx, state->cbufs[i]->texture);
>> +
>>                 if (!surf->color_initialized) {
>>                         evergreen_init_color_surface(rctx, surf);
>>                 }
>> @@ -1699,6 +1701,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
>>         if (state->zsbuf) {
>>                 surf = (struct r600_surface*)state->zsbuf;
>>
>> +               r600_context_add_resource_size(ctx, state->zsbuf->texture);
>> +
>>                 if (!surf->depth_initialized) {
>>                         evergreen_init_depth_surface(rctx, surf);
>>                 }
>> diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
>> index a383c90..b9f7d3d 100644
>> --- a/src/gallium/drivers/r600/r600.h
>> +++ b/src/gallium/drivers/r600/r600.h
>> @@ -50,6 +50,7 @@ struct r600_resource {
>>
>>         /* Resource state. */
>>         unsigned                        domains;
>> +       uint64_t                        size;
>
> Don't add this. Use r600_resource::buf::size instead, which is already
> initialized.
>
>
>>  };
>>
>>  #define R600_BLOCK_MAX_BO              32
>> diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
>> index 6df0d91..92f549a 100644
>> --- a/src/gallium/drivers/r600/r600_buffer.c
>> +++ b/src/gallium/drivers/r600/r600_buffer.c
>> @@ -250,6 +250,7 @@ bool r600_init_resource(struct r600_screen *rscreen,
>>                 break;
>>         }
>>
>> +       res->size = size;
>>         res->buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
>>                                                use_reusable_pool,
>>                                                initial_domain);
>> diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
>> index ebafd97..44d3b4d 100644
>> --- a/src/gallium/drivers/r600/r600_hw_context.c
>> +++ b/src/gallium/drivers/r600/r600_hw_context.c
>> @@ -359,6 +359,16 @@ out_err:
>>  void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
>>                         boolean count_draw_in)
>>  {
>> +       if (!ctx->ws->cs_memory_below_limit(ctx->rings.gfx.cs, ctx->vram, ctx->gtt)) {
>> +               ctx->gtt = 0;
>> +               ctx->vram = 0;
>> +               ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC);
>> +               return;
>> +       }
>> +       /* all will be accounted once relocation are emited */
>> +       ctx->gtt = 0;
>> +       ctx->vram = 0;
>> +
>>         /* The number of dwords we already used in the CS so far. */
>>         num_dw += ctx->rings.gfx.cs->cdw;
>>
>> @@ -784,6 +794,8 @@ void r600_begin_new_cs(struct r600_context *ctx)
>>
>>         ctx->pm4_dirty_cdwords = 0;
>>         ctx->flags = 0;
>> +       ctx->gtt = 0;
>> +       ctx->vram = 0;
>>
>>         /* Begin a new CS. */
>>         r600_emit_command_buffer(ctx->rings.gfx.cs, &ctx->start_cs_cmd);
>> diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
>> index a59578d..cb50cfe 100644
>> --- a/src/gallium/drivers/r600/r600_pipe.c
>> +++ b/src/gallium/drivers/r600/r600_pipe.c
>> @@ -333,6 +333,9 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
>>         rctx->chip_class = rscreen->chip_class;
>>         rctx->keep_tiling_flags = rscreen->info.drm_minor >= 12;
>>
>> +       rctx->gtt = 0;
>> +       rctx->vram = 0;
>
> There is no reason to initialize anything to 0 in context_create. The
> whole context structure is calloc'd.
>
>
>> +
>>         LIST_INITHEAD(&rctx->active_nontimer_queries);
>>         LIST_INITHEAD(&rctx->dirty);
>>         LIST_INITHEAD(&rctx->enable_list);
>> diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
>> index 3ff42d3..beb4b33 100644
>> --- a/src/gallium/drivers/r600/r600_pipe.h
>> +++ b/src/gallium/drivers/r600/r600_pipe.h
>> @@ -447,6 +447,10 @@ struct r600_context {
>>         unsigned                        backend_mask;
>>         unsigned                        max_db; /* for OQ */
>>
>> +       /* current unaccounted memory usage */
>> +       uint64_t                        vram;
>> +       uint64_t                        gtt;
>> +
>>         /* Miscellaneous state objects. */
>>         void                            *custom_dsa_flush;
>>         void                            *custom_blend_resolve;
>> @@ -998,4 +1002,21 @@ static INLINE unsigned u_max_layer(struct pipe_resource *r, unsigned level)
>>         }
>>  }
>>
>> +static INLINE void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
>> +{
>> +       struct r600_context *rctx = (struct r600_context *)ctx;
>> +       struct r600_resource *rr = (struct r600_resource *)r;
>> +
>> +       if (r == NULL) {
>> +               return;
>> +       }
>
> Do you really need to check for NULL here? An assertion would be sufficient.
>
> We can also skip this if the buffer is already in the relocation list.
> We have a query for that: rctx->ws->cs_is_buffer_referenced.

Yes there is some case where i was getting null at some point. I don't
want to have a precise accounting but rather a good estimate of it and
i don't want to waste cpu cycle checking if a buffer is already in
relocation list. More over at each draw time the counter are reseted
and the value computed by the relocation process is taken into account
so often the estimation is pretty good. In my testing (where i over
allocate by a random M amount each buffer) the margin in various gl
(heaven, reaction, xonotic, unreal, vdrift, ....) was always in 10% of
the limit (below or above), given there is room for fragmentation it
was working ok.

So i think this gave a very good estimate and a quick one.

>
>> +
>> +       if (rr->domains & RADEON_DOMAIN_GTT) {
>> +               rctx->gtt += rr->size;
>> +       }
>> +       if (rr->domains & RADEON_DOMAIN_VRAM) {
>> +               rctx->vram += rr->size;
>> +       }
>> +}
>> +
>>  #endif
>> diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
>> index 6b4b2c3..58b5aaa 100644
>> --- a/src/gallium/drivers/r600/r600_state.c
>> +++ b/src/gallium/drivers/r600/r600_state.c
>> @@ -1544,6 +1544,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
>>
>>                 surf = (struct r600_surface*)state->cbufs[i];
>>                 rtex = (struct r600_texture*)surf->base.texture;
>> +               r600_context_add_resource_size(ctx, state->cbufs[i]->texture);
>>
>>                 if (!surf->color_initialized || force_cmask_fmask) {
>>                         r600_init_color_surface(rctx, surf, force_cmask_fmask);
>> @@ -1576,6 +1577,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
>>         if (state->zsbuf) {
>>                 surf = (struct r600_surface*)state->zsbuf;
>>
>> +               r600_context_add_resource_size(ctx, state->zsbuf->texture);
>> +
>>                 if (!surf->depth_initialized) {
>>                         r600_init_depth_surface(rctx, surf);
>>                 }
>> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
>> index 9386f61..fce2537 100644
>> --- a/src/gallium/drivers/r600/r600_state_common.c
>> +++ b/src/gallium/drivers/r600/r600_state_common.c
>> @@ -479,7 +479,8 @@ static void r600_set_index_buffer(struct pipe_context *ctx,
>>
>>         if (ib) {
>>                 pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer);
>> -               memcpy(&rctx->index_buffer, ib, sizeof(*ib));
>> +               memcpy(&rctx->index_buffer, ib, sizeof(*ib));
>> +               r600_context_add_resource_size(ctx, ib->buffer);
>>         } else {
>>                 pipe_resource_reference(&rctx->index_buffer.buffer, NULL);
>>         }
>> @@ -516,6 +517,7 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx,
>>                                         vb[i].buffer_offset = input[i].buffer_offset;
>>                                         pipe_resource_reference(&vb[i].buffer, input[i].buffer);
>>                                         new_buffer_mask |= 1 << i;
>> +                                       r600_context_add_resource_size(ctx, input[i].buffer);
>>                                 } else {
>>                                         pipe_resource_reference(&vb[i].buffer, NULL);
>>                                         disable_mask |= 1 << i;
>> @@ -613,6 +615,7 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
>>
>>                         pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], views[i]);
>>                         new_mask |= 1 << i;
>> +                       r600_context_add_resource_size(pipe, views[i]->texture);
>>                 } else {
>>                         pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], NULL);
>>                         disable_mask |= 1 << i;
>> @@ -806,6 +809,10 @@ static void r600_bind_ps_state(struct pipe_context *ctx, void *state)
>>         rctx->ps_shader = (struct r600_pipe_shader_selector *)state;
>>         r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate);
>>
>> +       if (rctx->ps_shader->current) {
>
> This conditional is useless. rctx->ps_shader->current is never NULL at
> this point.
>
>
>> +               r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->ps_shader->current->bo);
>> +       }
>> +
>>         if (rctx->chip_class <= R700) {
>>                 bool multiwrite = rctx->ps_shader->current->shader.fs_write_all;
>>
>> @@ -835,6 +842,10 @@ static void r600_bind_vs_state(struct pipe_context *ctx, void *state)
>>         if (state) {
>>                 r600_context_pipe_state_set(rctx, &rctx->vs_shader->current->rstate);
>>
>> +               if (rctx->ps_shader->current) {
>> +                       r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->ps_shader->current->bo);
>> +               }
>
> Maybe you wanted to use vs_shader instead? And vs_shader->current is
> never NULL here too.
>
>
>> +
>>                 /* Update clip misc state. */
>>                 if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl ||
>>                     rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write) {
>> @@ -938,10 +949,13 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint
>>                 } else {
>>                         u_upload_data(rctx->uploader, 0, input->buffer_size, ptr, &cb->buffer_offset, &cb->buffer);
>>                 }
>> +               /* account it in gtt */
>> +               rctx->gtt += input->buffer_size;
>
> Are you sure about this? The buffer is usually much larger than
> input->buffer_size and the kernel only cares about the whole buffer.
>
> The rest looks good.
>
> Marek

As stated above i am looking at an estimate and each draw call will
account for the proper index buffer size so in the end my testing
showed i always had a very close estimate.

Cheers,
Jerome


More information about the mesa-dev mailing list