[Mesa-dev] [PATCH v2 12/12] radeonsi: Use CE for all descriptors.

Marek Olšák maraeo at gmail.com
Mon Apr 18 11:46:52 UTC 2016


On Sun, Apr 17, 2016 at 1:43 AM, Bas Nieuwenhuizen
<bas at basnieuwenhuizen.nl> wrote:
> v2: Load previous list for new CS instead of re-emitting
>     all descriptors.
>
> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
> ---
>  src/gallium/drivers/radeonsi/si_descriptors.c | 70 +++++++++++++++++++++++----
>  1 file changed, 60 insertions(+), 10 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 8ca0253..e4f06e7 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -60,6 +60,7 @@
>  #include "si_shader.h"
>  #include "sid.h"
>
> +#include "util/u_math.h"
>  #include "util/u_memory.h"
>  #include "util/u_suballoc.h"
>  #include "util/u_upload_mgr.h"
> @@ -152,29 +153,78 @@ static bool si_ce_upload(struct si_context *sctx, unsigned ce_offset, unsigned s
>         return true;
>  }
>
> +static void si_reinitialize_ce_ram(struct si_context *sctx,
> +                            struct si_descriptors *desc)
> +{
> +       if (desc->buffer) {
> +               struct r600_resource *buffer = (struct r600_resource*)desc->buffer;
> +               unsigned list_size = desc->num_elements * desc->element_dw_size * 4;
> +               uint64_t va = buffer->gpu_address + desc->buffer_offset;
> +               struct radeon_winsys_cs *ib = sctx->ce_preamble_ib;
> +
> +               if (!ib)
> +                       ib = sctx->ce_ib;
> +
> +               list_size = align(list_size, 32);
> +
> +               radeon_emit(ib, PKT3(PKT3_LOAD_CONST_RAM, 3, 0));
> +               radeon_emit(ib, va);
> +               radeon_emit(ib, va >> 32);
> +               radeon_emit(ib, list_size / 4);
> +               radeon_emit(ib, desc->ce_offset);
> +
> +               radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
> +                                   RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
> +       }
> +       desc->ce_ram_dirty = false;
> +}
>
>  static bool si_upload_descriptors(struct si_context *sctx,
>                                   struct si_descriptors *desc)
>  {
>         unsigned list_size = desc->num_elements * desc->element_dw_size * 4;
> -       void *ptr;
>
>         if (!desc->dirty_mask)
>                 return true;
>
> -       u_upload_alloc(sctx->b.uploader, 0, list_size, 256,
> -                      &desc->buffer_offset,
> -                      (struct pipe_resource**)&desc->buffer, &ptr);
> -       if (!desc->buffer)
> -               return false; /* skip the draw call */
> +       if (sctx->ce_ib) {
> +               uint32_t const* list = (uint32_t const*)desc->list;
>
> -       util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
> +               if (desc->ce_ram_dirty)
> +                       si_reinitialize_ce_ram(sctx, desc);
>
> -       radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
> -                             RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
> +               while(desc->dirty_mask) {
> +                       int begin, count;
> +                       u_bit_scan_consecutive_range64(&desc->dirty_mask, &begin,
> +                                                      &count);
>
> -       desc->dirty_mask = 0;
> +                       begin *= desc->element_dw_size;
> +                       count *= desc->element_dw_size;
> +
> +                       radeon_emit(sctx->ce_ib,
> +                                   PKT3(PKT3_WRITE_CONST_RAM, count, 0));
> +                       radeon_emit(sctx->ce_ib, desc->ce_offset + begin * 4);
> +                       radeon_emit_array(sctx->ce_ib, list + begin, count);
> +               }
> +
> +               if (!si_ce_upload(sctx, desc->ce_offset, list_size,
> +                                          &desc->buffer_offset, &desc->buffer))
> +                       return false;
> +       } else {
> +               void *ptr;
> +
> +               u_upload_alloc(sctx->b.uploader, 0, list_size, 256,
> +                       &desc->buffer_offset,
> +                       (struct pipe_resource**)&desc->buffer, &ptr);
> +               if (!desc->buffer)
> +                       return false; /* skip the draw call */
> +
> +               util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
> +       }
> +       radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
> +                                 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);

For clarity, it would be better to do radeon_add_to_buffer_list in
si_ce_upload, because that's the first IB user. In any case:

Reviewed-by: Marek Olšák <marek.olsak at amd.com>

Marek


More information about the mesa-dev mailing list