[Mesa-dev] [PATCH 3/4] winsys/radeon: Keep bo statistics

Marek Olšák maraeo at gmail.com
Wed Jan 8 03:03:12 PST 2014


On Tue, Jan 7, 2014 at 7:14 PM, Lauri Kasanen <cand at gmx.com> wrote:
> These will be used later on for optimizing the VRAM placement.
>
> No measurable overhead (glxgears, torcs).
>
> v2: Get accurate stats by taking dirty_masks into account

Why don't you just set the statistics once per CS in
radeon_drm_cs_flush? I don't see a value in doing it in every function
that sets the resources.

Also, last_cpu_time will be wrong anyway, because the driver may call
the buffer_map function only once per buffer, e.g. at buffer creation.
It's not a rule, but it happens.

>
> Marek: I'm not sure I was testing torcs right; I started a race on Forza, drove around, and looked at the fps. Doesn't seem to do timedemos.

That's correct. I usually record FPS of the first few seconds of the game.

To answer your question from IRC... every frame is ended with a
context flush (called from SwapBuffers), so there is always at least
one CS per frame. The last context flush should have
PIPE_FLUSH_END_OF_FRAME set.

Marek

>
> Signed-off-by: Lauri Kasanen <cand at gmx.com>
> ---
>  src/gallium/drivers/r600/r600_state_common.c      | 23 +++++++++++++++++++++
>  src/gallium/winsys/radeon/drm/radeon_drm_bo.c     |  3 +++
>  src/gallium/winsys/radeon/drm/radeon_drm_bo.h     | 16 +++++++++++++++
>  src/gallium/winsys/radeon/drm/radeon_drm_cs.c     |  8 +-------
>  src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 25 +++++++++++++++++++++++
>  src/gallium/winsys/radeon/drm/radeon_winsys.h     | 11 ++++++++++
>  6 files changed, 79 insertions(+), 7 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
> index 3dc7991..ba37af8 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -518,6 +518,7 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx,
>         struct pipe_vertex_buffer *vb = state->vb + start_slot;
>         unsigned i;
>         uint32_t disable_mask = 0;
> +       uint32_t skipped_mask = 0;
>         /* These are the new buffers set by this function. */
>         uint32_t new_buffer_mask = 0;
>
> @@ -553,6 +554,17 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx,
>         rctx->vertex_buffer_state.dirty_mask |= new_buffer_mask;
>
>         r600_vertex_buffers_dirty(rctx);
> +
> +       /* Update statistics for the skipped vertex buffers */
> +       skipped_mask = rctx->vertex_buffer_state.enabled_mask &
> +                               ~rctx->vertex_buffer_state.dirty_mask;
> +       while (skipped_mask) {
> +               struct r600_resource *rbuffer;
> +               i = u_bit_scan(&skipped_mask);
> +
> +               rbuffer = (struct r600_resource*)vb[i].buffer;
> +               rctx->b.ws->update_bo_stats(rctx->b.ws, rbuffer->cs_buf, RADEON_USAGE_READ);
> +       }
>  }
>
>  void r600_sampler_views_dirty(struct r600_context *rctx,
> @@ -575,6 +587,7 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
>         struct r600_pipe_sampler_view **rviews = (struct r600_pipe_sampler_view **)views;
>         uint32_t dirty_sampler_states_mask = 0;
>         unsigned i;
> +       uint32_t skipped_mask = 0;
>         /* This sets 1-bit for textures with index >= count. */
>         uint32_t disable_mask = ~((1ull << count) - 1);
>         /* These are the new textures set by this function. */
> @@ -654,6 +667,16 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
>                 dst->states.dirty_mask |= dirty_sampler_states_mask;
>                 r600_sampler_states_dirty(rctx, &dst->states);
>         }
> +
> +       /* Update statistics for the skipped samplers */
> +       skipped_mask = dst->views.enabled_mask & ~dst->views.dirty_mask;
> +       while (skipped_mask) {
> +               i = u_bit_scan(&skipped_mask);
> +
> +               rctx->b.ws->update_bo_stats(rctx->b.ws,
> +                                               rviews[i]->tex_resource->cs_buf,
> +                                               RADEON_USAGE_READ);
> +       }
>  }
>
>  static void r600_set_viewport_states(struct pipe_context *ctx,
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> index ea99351..606b65a 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> @@ -529,6 +529,9 @@ static void *radeon_bo_map(struct radeon_winsys_cs_handle *buf,
>          fprintf(ws->bo_stats_file, "%p cpu mapped @%llu\n", bo, stats_time_get(ws));
>      }
>
> +    bo->stats.num_cpu_ops++;
> +    bo->stats.last_cpu_time = stats_time_get(ws);
> +
>      return radeon_bo_do_map(bo);
>  }
>
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
> index 5536bc1..651694b 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
> @@ -44,6 +44,20 @@ struct radeon_bo_desc {
>      unsigned initial_domains;
>  };
>
> +struct radeon_bo_stats {
> +    uint64_t num_reads;
> +    uint64_t num_writes;
> +    uint64_t num_cpu_ops;
> +
> +    /* Milliseconds in a monotonic clock */
> +    uint64_t last_read_time;
> +    uint64_t last_write_time;
> +    uint64_t last_cpu_time;
> +
> +    /* Depth, MSAA, etc. */
> +    bool high_prio;
> +};
> +
>  struct radeon_bo {
>      struct pb_buffer base;
>
> @@ -67,6 +81,8 @@ struct radeon_bo {
>
>      boolean flinked;
>      uint32_t flink;
> +
> +    struct radeon_bo_stats stats;
>  };
>
>  struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws);
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> index 845603d..5c1b756 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> @@ -353,13 +353,7 @@ static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
>      if (added_domains & RADEON_DOMAIN_VRAM)
>          cs->csc->used_vram += bo->base.size;
>
> -    if (ws->bo_stats_file) {
> -        if (usage & RADEON_USAGE_WRITE) {
> -            fprintf(ws->bo_stats_file, "%p write @%llu\n", bo, stats_time_get(ws));
> -        } else {
> -            fprintf(ws->bo_stats_file, "%p read @%llu\n", bo, stats_time_get(ws));
> -        }
> -    }
> +    ws->base.update_bo_stats((struct radeon_winsys *) ws, buf, usage);
>
>      return index;
>  }
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> index bb17a21..44b5e21 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> @@ -564,6 +564,30 @@ static void radeon_enable_bo_stats(struct radeon_winsys *rws)
>              stats_time_get(ws));
>  }
>
> +static void radeon_update_bo_stats(struct radeon_winsys *rws,
> +                                   struct radeon_winsys_cs_handle *buf,
> +                                   enum radeon_bo_usage usage)
> +{
> +    struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws;
> +    struct radeon_bo *bo = (struct radeon_bo*)buf;
> +
> +    if (ws->bo_stats_file) {
> +        if (usage & RADEON_USAGE_WRITE) {
> +            fprintf(ws->bo_stats_file, "%p write @%llu\n", bo, stats_time_get(ws));
> +        } else {
> +            fprintf(ws->bo_stats_file, "%p read @%llu\n", bo, stats_time_get(ws));
> +        }
> +    }
> +
> +    if (usage & RADEON_USAGE_WRITE) {
> +        bo->stats.num_writes++;
> +        bo->stats.last_write_time = stats_time_get(ws);
> +    } else {
> +        bo->stats.num_reads++;
> +        bo->stats.last_read_time = stats_time_get(ws);
> +    }
> +}
> +
>  static unsigned hash_fd(void *key)
>  {
>      int fd = pointer_to_intptr(key);
> @@ -701,6 +725,7 @@ PUBLIC struct radeon_winsys *radeon_drm_winsys_create(int fd)
>      ws->base.surface_best = radeon_drm_winsys_surface_best;
>      ws->base.query_value = radeon_query_value;
>      ws->base.enable_bo_stats = radeon_enable_bo_stats;
> +    ws->base.update_bo_stats = radeon_update_bo_stats;
>
>      radeon_bomgr_init_functions(ws);
>      radeon_drm_cs_init_functions(ws);
> diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
> index bee22f8..0e559e1 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
> +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
> @@ -541,6 +541,17 @@ struct radeon_winsys {
>       * Enable bo statistics logging.
>       */
>      void (*enable_bo_stats)(struct radeon_winsys *ws);
> +
> +    /**
> +     * Update bo statistics.
> +     * The buf and usage params match those in cs_add_reloc.
> +     *
> +     * \param buf A winsys buffer to validate.
> +     * \param usage   Whether the buffer is used for read and/or write.
> +     */
> +    void (*update_bo_stats)(struct radeon_winsys *rws,
> +                            struct radeon_winsys_cs_handle *buf,
> +                            enum radeon_bo_usage usage);
>  };
>
>  /**
> --
> 1.8.3.1
>


More information about the mesa-dev mailing list