[Mesa-dev] [PATCH v2 56/64] radeonsi: decompress resident textures/images before graphics/compute

Tue Jun 6 23:46:47 UTC 2017

On Wed, Jun 7, 2017 at 1:27 AM, Marek Olšák <maraeo at gmail.com> wrote:
> On Tue, May 30, 2017 at 10:36 PM, Samuel Pitoiset
> <samuel.pitoiset at gmail.com> wrote:
>> Similar to the existing decompression code path except that it
>> loops over the list of resident textures/images.
>>
>> v2: - store pipe_sampler_view instead of si_sampler_view
>>
>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
>> ---
>>  src/gallium/drivers/radeonsi/si_blit.c        | 77 +++++++++++++++++++++++++--
>>  src/gallium/drivers/radeonsi/si_descriptors.c | 52 ++++++++++++++++++
>>  src/gallium/drivers/radeonsi/si_pipe.h        |  3 ++
>>  3 files changed, 129 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
>> index 343ca35736..a47f43958c 100644
>> --- a/src/gallium/drivers/radeonsi/si_blit.c
>> +++ b/src/gallium/drivers/radeonsi/si_blit.c
>> @@ -22,6 +22,7 @@
>>   */
>>
>>  #include "si_pipe.h"
>> +#include "si_compute.h"
>>  #include "util/u_format.h"
>>  #include "util/u_surface.h"
>>
>> @@ -690,9 +691,6 @@ static void si_decompress_textures(struct si_context *sctx, unsigned shader_mask
>>  {
>>         unsigned compressed_colortex_counter, mask;
>>
>> -       if (sctx->blitter->running)
>> -               return;
>> -
>
> You can keep this.
>
>>         /* Update the compressed_colortex_mask if necessary. */
>>         compressed_colortex_counter = p_atomic_read(&sctx->screen->b.compressed_colortex_counter);
>>         if (compressed_colortex_counter != sctx->b.last_compressed_colortex_counter) {
>> @@ -719,14 +717,87 @@ static void si_decompress_textures(struct si_context *sctx, unsigned shader_mask
>>         si_check_render_feedback(sctx);
>>  }
>>
>> +static void si_decompress_resident_textures(struct si_context *sctx)
>> +{
>> +       unsigned num_resident_tex_handles;
>> +       unsigned i;
>> +
>> +       num_resident_tex_handles = sctx->resident_tex_handles.size /
>> +                                  sizeof(struct si_texture_handle *);
>> +
>> +       for (i = 0; i < num_resident_tex_handles; i++) {
>> +               struct si_texture_handle *tex_handle =
>> +                       *util_dynarray_element(&sctx->resident_tex_handles,
>> +                                              struct si_texture_handle *, i);
>> +               struct pipe_sampler_view *view = tex_handle->view;
>> +               struct si_sampler_view *sview = (struct si_sampler_view *)view;
>> +               struct r600_texture *tex;
>> +
>> +               assert(view);
>> +               tex = (struct r600_texture *)view->texture;
>> +
>> +               if (view->texture->target == PIPE_BUFFER)
>> +                       continue;
>> +
>> +               if (tex_handle->compressed_colortex)
>> +                       si_decompress_color_texture(sctx, tex, view->u.tex.first_level,
>> +                                                   view->u.tex.last_level);
>> +
>> +               if (tex_handle->depth_texture)
>> +                       si_flush_depth_texture(sctx, tex,
>> +                               sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z,
>> +                               view->u.tex.first_level, view->u.tex.last_level,
>> +                               0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level));
>> +       }
>> +}
>> +
>> +static void si_decompress_resident_images(struct si_context *sctx)
>> +{
>> +       unsigned num_resident_img_handles;
>> +       unsigned i;
>> +
>> +       num_resident_img_handles = sctx->resident_img_handles.size /
>> +                                  sizeof(struct si_image_handle *);
>> +
>> +       for (i = 0; i < num_resident_img_handles; i++) {
>> +               struct si_image_handle *img_handle =
>> +                       *util_dynarray_element(&sctx->resident_img_handles,
>> +                                              struct si_image_handle *, i);
>> +               struct pipe_image_view *view = &img_handle->view;
>> +               struct r600_texture *tex;
>> +
>> +               assert(view);
>> +               tex = (struct r600_texture *)view->resource;
>> +
>> +               if (view->resource->target == PIPE_BUFFER)
>> +                       continue;
>> +
>> +               if (img_handle->compressed_colortex)
>> +                       si_decompress_color_texture(sctx, tex, view->u.tex.level,
>> +                                                   view->u.tex.level);
>> +       }
>> +}
>
> The loops in the two functions above will destroy CPU performance,
> because both functions are called for every draw call. We need to find
> a better way.
>
> I suggest that si_resident_handles_update_compressed_colortex should
> be rewritten to build 2 separate lists (for samples and images)
> containing only references to bindless slots that need decompression,
> and make_resident functions should update the separate lists too. Then
> the two functions above can walk the separate lists instead of all
> resident handles. The effect will be that only those slots that need
> decompression will be checked. (ideally 0 of very few)

Just saw patches 58-59. They certainly improve it a bit - at least
apps not using bindless handles will not be affected.

I guess the main optimization of this can be done in follow-up
patches. My other comments below still apply here though.

>
>> +
>>  void si_decompress_graphics_textures(struct si_context *sctx)
>>  {
>> +       if (sctx->blitter->running)
>> +               return;
>> +
>>         si_decompress_textures(sctx, u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS));
>> +
>> +       si_decompress_resident_textures(sctx);
>> +       si_decompress_resident_images(sctx);
>
> These changes...
>
>>  }
>>
>>  void si_decompress_compute_textures(struct si_context *sctx)
>>  {
>> +       if (sctx->blitter->running)
>> +               return;
>> +
>>         si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
>> +
>> +       si_decompress_resident_textures(sctx);
>> +       si_decompress_resident_images(sctx);
>
> ... and these changes can be moved into si_decompress_textures.
>

Marek