[Mesa-dev] [PATCH] r600g: add cs memory usage accounting and limit it

Marek Olšák maraeo at gmail.com
Thu Jan 31 08:35:53 PST 2013


On Thu, Jan 31, 2013 at 4:29 PM, Jerome Glisse <j.glisse at gmail.com> wrote:
> On Wed, Jan 30, 2013 at 10:35 PM, Marek Olšák <maraeo at gmail.com> wrote:
>> On Wed, Jan 30, 2013 at 6:14 PM,  <j.glisse at gmail.com> wrote:
>>> From: Jerome Glisse <jglisse at redhat.com>
>>>
>>> We are now seing cs that can go over the vram+gtt size to avoid
>>> failing flush early cs that goes over 70% (gtt+vram) usage. 70%
>>> is use to allow some fragmentation.
>>>
>>> Signed-off-by: Jerome Glisse <jglisse at redhat.com>
>>> ---
>>>  src/gallium/drivers/r600/evergreen_state.c    |  4 ++++
>>>  src/gallium/drivers/r600/r600.h               |  1 +
>>>  src/gallium/drivers/r600/r600_buffer.c        |  1 +
>>>  src/gallium/drivers/r600/r600_hw_context.c    | 12 ++++++++++++
>>>  src/gallium/drivers/r600/r600_pipe.c          |  3 +++
>>>  src/gallium/drivers/r600/r600_pipe.h          | 21 +++++++++++++++++++++
>>>  src/gallium/drivers/r600/r600_state.c         |  3 +++
>>>  src/gallium/drivers/r600/r600_state_common.c  | 17 ++++++++++++++++-
>>>  src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 11 +++++++++++
>>>  src/gallium/winsys/radeon/drm/radeon_winsys.h | 10 ++++++++++
>>>  10 files changed, 82 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
>>> index be1c427..84f8dce 100644
>>> --- a/src/gallium/drivers/r600/evergreen_state.c
>>> +++ b/src/gallium/drivers/r600/evergreen_state.c
>>> @@ -1668,6 +1668,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
>>>                 surf = (struct r600_surface*)state->cbufs[i];
>>>                 rtex = (struct r600_texture*)surf->base.texture;
>>>
>>> +               r600_context_add_resource_size(ctx, state->cbufs[i]->texture);
>>> +
>>>                 if (!surf->color_initialized) {
>>>                         evergreen_init_color_surface(rctx, surf);
>>>                 }
>>> @@ -1699,6 +1701,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
>>>         if (state->zsbuf) {
>>>                 surf = (struct r600_surface*)state->zsbuf;
>>>
>>> +               r600_context_add_resource_size(ctx, state->zsbuf->texture);
>>> +
>>>                 if (!surf->depth_initialized) {
>>>                         evergreen_init_depth_surface(rctx, surf);
>>>                 }
>>> diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
>>> index a383c90..b9f7d3d 100644
>>> --- a/src/gallium/drivers/r600/r600.h
>>> +++ b/src/gallium/drivers/r600/r600.h
>>> @@ -50,6 +50,7 @@ struct r600_resource {
>>>
>>>         /* Resource state. */
>>>         unsigned                        domains;
>>> +       uint64_t                        size;
>>
>> Don't add this. Use r600_resource::buf::size instead, which is already
>> initialized.
>>
>>
>>>  };
>>>
>>>  #define R600_BLOCK_MAX_BO              32
>>> diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
>>> index 6df0d91..92f549a 100644
>>> --- a/src/gallium/drivers/r600/r600_buffer.c
>>> +++ b/src/gallium/drivers/r600/r600_buffer.c
>>> @@ -250,6 +250,7 @@ bool r600_init_resource(struct r600_screen *rscreen,
>>>                 break;
>>>         }
>>>
>>> +       res->size = size;
>>>         res->buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
>>>                                                use_reusable_pool,
>>>                                                initial_domain);
>>> diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
>>> index ebafd97..44d3b4d 100644
>>> --- a/src/gallium/drivers/r600/r600_hw_context.c
>>> +++ b/src/gallium/drivers/r600/r600_hw_context.c
>>> @@ -359,6 +359,16 @@ out_err:
>>>  void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
>>>                         boolean count_draw_in)
>>>  {
>>> +       if (!ctx->ws->cs_memory_below_limit(ctx->rings.gfx.cs, ctx->vram, ctx->gtt)) {
>>> +               ctx->gtt = 0;
>>> +               ctx->vram = 0;
>>> +               ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC);
>>> +               return;
>>> +       }
>>> +       /* all will be accounted once relocation are emited */
>>> +       ctx->gtt = 0;
>>> +       ctx->vram = 0;
>>> +
>>>         /* The number of dwords we already used in the CS so far. */
>>>         num_dw += ctx->rings.gfx.cs->cdw;
>>>
>>> @@ -784,6 +794,8 @@ void r600_begin_new_cs(struct r600_context *ctx)
>>>
>>>         ctx->pm4_dirty_cdwords = 0;
>>>         ctx->flags = 0;
>>> +       ctx->gtt = 0;
>>> +       ctx->vram = 0;
>>>
>>>         /* Begin a new CS. */
>>>         r600_emit_command_buffer(ctx->rings.gfx.cs, &ctx->start_cs_cmd);
>>> diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
>>> index a59578d..cb50cfe 100644
>>> --- a/src/gallium/drivers/r600/r600_pipe.c
>>> +++ b/src/gallium/drivers/r600/r600_pipe.c
>>> @@ -333,6 +333,9 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
>>>         rctx->chip_class = rscreen->chip_class;
>>>         rctx->keep_tiling_flags = rscreen->info.drm_minor >= 12;
>>>
>>> +       rctx->gtt = 0;
>>> +       rctx->vram = 0;
>>
>> There is no reason to initialize anything to 0 in context_create. The
>> whole context structure is calloc'd.
>>
>>
>>> +
>>>         LIST_INITHEAD(&rctx->active_nontimer_queries);
>>>         LIST_INITHEAD(&rctx->dirty);
>>>         LIST_INITHEAD(&rctx->enable_list);
>>> diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
>>> index 3ff42d3..beb4b33 100644
>>> --- a/src/gallium/drivers/r600/r600_pipe.h
>>> +++ b/src/gallium/drivers/r600/r600_pipe.h
>>> @@ -447,6 +447,10 @@ struct r600_context {
>>>         unsigned                        backend_mask;
>>>         unsigned                        max_db; /* for OQ */
>>>
>>> +       /* current unaccounted memory usage */
>>> +       uint64_t                        vram;
>>> +       uint64_t                        gtt;
>>> +
>>>         /* Miscellaneous state objects. */
>>>         void                            *custom_dsa_flush;
>>>         void                            *custom_blend_resolve;
>>> @@ -998,4 +1002,21 @@ static INLINE unsigned u_max_layer(struct pipe_resource *r, unsigned level)
>>>         }
>>>  }
>>>
>>> +static INLINE void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
>>> +{
>>> +       struct r600_context *rctx = (struct r600_context *)ctx;
>>> +       struct r600_resource *rr = (struct r600_resource *)r;
>>> +
>>> +       if (r == NULL) {
>>> +               return;
>>> +       }
>>
>> Do you really need to check for NULL here? An assertion would be sufficient.
>>
>> We can also skip this if the buffer is already in the relocation list.
>> We have a query for that: rctx->ws->cs_is_buffer_referenced.
>
> Yes there is some case where i was getting null at some point. I don't
> want to have a precise accounting but rather a good estimate of it and
> i don't want to waste cpu cycle checking if a buffer is already in
> relocation list. More over at each draw time the counter are reseted
> and the value computed by the relocation process is taken into account
> so often the estimation is pretty good. In my testing (where i over
> allocate by a random M amount each buffer) the margin in various gl
> (heaven, reaction, xonotic, unreal, vdrift, ....) was always in 10% of
> the limit (below or above), given there is room for fragmentation it
> was working ok.
>
> So i think this gave a very good estimate and a quick one.
>
>>
>>> +
>>> +       if (rr->domains & RADEON_DOMAIN_GTT) {
>>> +               rctx->gtt += rr->size;
>>> +       }
>>> +       if (rr->domains & RADEON_DOMAIN_VRAM) {
>>> +               rctx->vram += rr->size;
>>> +       }
>>> +}
>>> +
>>>  #endif
>>> diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
>>> index 6b4b2c3..58b5aaa 100644
>>> --- a/src/gallium/drivers/r600/r600_state.c
>>> +++ b/src/gallium/drivers/r600/r600_state.c
>>> @@ -1544,6 +1544,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
>>>
>>>                 surf = (struct r600_surface*)state->cbufs[i];
>>>                 rtex = (struct r600_texture*)surf->base.texture;
>>> +               r600_context_add_resource_size(ctx, state->cbufs[i]->texture);
>>>
>>>                 if (!surf->color_initialized || force_cmask_fmask) {
>>>                         r600_init_color_surface(rctx, surf, force_cmask_fmask);
>>> @@ -1576,6 +1577,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
>>>         if (state->zsbuf) {
>>>                 surf = (struct r600_surface*)state->zsbuf;
>>>
>>> +               r600_context_add_resource_size(ctx, state->zsbuf->texture);
>>> +
>>>                 if (!surf->depth_initialized) {
>>>                         r600_init_depth_surface(rctx, surf);
>>>                 }
>>> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
>>> index 9386f61..fce2537 100644
>>> --- a/src/gallium/drivers/r600/r600_state_common.c
>>> +++ b/src/gallium/drivers/r600/r600_state_common.c
>>> @@ -479,7 +479,8 @@ static void r600_set_index_buffer(struct pipe_context *ctx,
>>>
>>>         if (ib) {
>>>                 pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer);
>>> -               memcpy(&rctx->index_buffer, ib, sizeof(*ib));
>>> +               memcpy(&rctx->index_buffer, ib, sizeof(*ib));
>>> +               r600_context_add_resource_size(ctx, ib->buffer);
>>>         } else {
>>>                 pipe_resource_reference(&rctx->index_buffer.buffer, NULL);
>>>         }
>>> @@ -516,6 +517,7 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx,
>>>                                         vb[i].buffer_offset = input[i].buffer_offset;
>>>                                         pipe_resource_reference(&vb[i].buffer, input[i].buffer);
>>>                                         new_buffer_mask |= 1 << i;
>>> +                                       r600_context_add_resource_size(ctx, input[i].buffer);
>>>                                 } else {
>>>                                         pipe_resource_reference(&vb[i].buffer, NULL);
>>>                                         disable_mask |= 1 << i;
>>> @@ -613,6 +615,7 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
>>>
>>>                         pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], views[i]);
>>>                         new_mask |= 1 << i;
>>> +                       r600_context_add_resource_size(pipe, views[i]->texture);
>>>                 } else {
>>>                         pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], NULL);
>>>                         disable_mask |= 1 << i;
>>> @@ -806,6 +809,10 @@ static void r600_bind_ps_state(struct pipe_context *ctx, void *state)
>>>         rctx->ps_shader = (struct r600_pipe_shader_selector *)state;
>>>         r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate);
>>>
>>> +       if (rctx->ps_shader->current) {
>>
>> This conditional is useless. rctx->ps_shader->current is never NULL at
>> this point.
>>
>>
>>> +               r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->ps_shader->current->bo);
>>> +       }
>>> +
>>>         if (rctx->chip_class <= R700) {
>>>                 bool multiwrite = rctx->ps_shader->current->shader.fs_write_all;
>>>
>>> @@ -835,6 +842,10 @@ static void r600_bind_vs_state(struct pipe_context *ctx, void *state)
>>>         if (state) {
>>>                 r600_context_pipe_state_set(rctx, &rctx->vs_shader->current->rstate);
>>>
>>> +               if (rctx->ps_shader->current) {
>>> +                       r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->ps_shader->current->bo);
>>> +               }
>>
>> Maybe you wanted to use vs_shader instead? And vs_shader->current is
>> never NULL here too.
>>
>>
>>> +
>>>                 /* Update clip misc state. */
>>>                 if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl ||
>>>                     rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write) {
>>> @@ -938,10 +949,13 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint
>>>                 } else {
>>>                         u_upload_data(rctx->uploader, 0, input->buffer_size, ptr, &cb->buffer_offset, &cb->buffer);
>>>                 }
>>> +               /* account it in gtt */
>>> +               rctx->gtt += input->buffer_size;
>>
>> Are you sure about this? The buffer is usually much larger than
>> input->buffer_size and the kernel only cares about the whole buffer.
>>
>> The rest looks good.
>>
>> Marek
>
> As stated above i am looking at an estimate and each draw call will
> account for the proper index buffer size so in the end my testing
> showed i always had a very close estimate.

Sounds good.

Some comment in the code wouldn't hurt, so that other people don't
consider it as a bug.

Marek


More information about the mesa-dev mailing list