[Mesa-dev] [PATCH v2 56/64] radeonsi: decompress resident textures/images before graphics/compute
Marek Olšák
maraeo at gmail.com
Tue Jun 6 23:27:05 UTC 2017
On Tue, May 30, 2017 at 10:36 PM, Samuel Pitoiset
<samuel.pitoiset at gmail.com> wrote:
> Similar to the existing decompression code path except that it
> loops over the list of resident textures/images.
>
> v2: - store pipe_sampler_view instead of si_sampler_view
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
> src/gallium/drivers/radeonsi/si_blit.c | 77 +++++++++++++++++++++++++--
> src/gallium/drivers/radeonsi/si_descriptors.c | 52 ++++++++++++++++++
> src/gallium/drivers/radeonsi/si_pipe.h | 3 ++
> 3 files changed, 129 insertions(+), 3 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
> index 343ca35736..a47f43958c 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -22,6 +22,7 @@
> */
>
> #include "si_pipe.h"
> +#include "si_compute.h"
> #include "util/u_format.h"
> #include "util/u_surface.h"
>
> @@ -690,9 +691,6 @@ static void si_decompress_textures(struct si_context *sctx, unsigned shader_mask
> {
> unsigned compressed_colortex_counter, mask;
>
> - if (sctx->blitter->running)
> - return;
> -
You can keep this.
> /* Update the compressed_colortex_mask if necessary. */
> compressed_colortex_counter = p_atomic_read(&sctx->screen->b.compressed_colortex_counter);
> if (compressed_colortex_counter != sctx->b.last_compressed_colortex_counter) {
> @@ -719,14 +717,87 @@ static void si_decompress_textures(struct si_context *sctx, unsigned shader_mask
> si_check_render_feedback(sctx);
> }
>
> +static void si_decompress_resident_textures(struct si_context *sctx)
> +{
> + unsigned num_resident_tex_handles;
> + unsigned i;
> +
> + num_resident_tex_handles = sctx->resident_tex_handles.size /
> + sizeof(struct si_texture_handle *);
> +
> + for (i = 0; i < num_resident_tex_handles; i++) {
> + struct si_texture_handle *tex_handle =
> + *util_dynarray_element(&sctx->resident_tex_handles,
> + struct si_texture_handle *, i);
> + struct pipe_sampler_view *view = tex_handle->view;
> + struct si_sampler_view *sview = (struct si_sampler_view *)view;
> + struct r600_texture *tex;
> +
> + assert(view);
> + tex = (struct r600_texture *)view->texture;
> +
> + if (view->texture->target == PIPE_BUFFER)
> + continue;
> +
> + if (tex_handle->compressed_colortex)
> + si_decompress_color_texture(sctx, tex, view->u.tex.first_level,
> + view->u.tex.last_level);
> +
> + if (tex_handle->depth_texture)
> + si_flush_depth_texture(sctx, tex,
> + sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z,
> + view->u.tex.first_level, view->u.tex.last_level,
> + 0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level));
> + }
> +}
> +
> +static void si_decompress_resident_images(struct si_context *sctx)
> +{
> + unsigned num_resident_img_handles;
> + unsigned i;
> +
> + num_resident_img_handles = sctx->resident_img_handles.size /
> + sizeof(struct si_image_handle *);
> +
> + for (i = 0; i < num_resident_img_handles; i++) {
> + struct si_image_handle *img_handle =
> + *util_dynarray_element(&sctx->resident_img_handles,
> + struct si_image_handle *, i);
> + struct pipe_image_view *view = &img_handle->view;
> + struct r600_texture *tex;
> +
> + assert(view);
> + tex = (struct r600_texture *)view->resource;
> +
> + if (view->resource->target == PIPE_BUFFER)
> + continue;
> +
> + if (img_handle->compressed_colortex)
> + si_decompress_color_texture(sctx, tex, view->u.tex.level,
> + view->u.tex.level);
> + }
> +}
The loops in the two functions above will destroy CPU performance,
because both functions are called for every draw call. We need to find
a better way.
I suggest that si_resident_handles_update_compressed_colortex should
be rewritten to build 2 separate lists (for samples and images)
containing only references to bindless slots that need decompression,
and make_resident functions should update the separate lists too. Then
the two functions above can walk the separate lists instead of all
resident handles. The effect will be that only those slots that need
decompression will be checked. (ideally 0 of very few)
> +
> void si_decompress_graphics_textures(struct si_context *sctx)
> {
> + if (sctx->blitter->running)
> + return;
> +
> si_decompress_textures(sctx, u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS));
> +
> + si_decompress_resident_textures(sctx);
> + si_decompress_resident_images(sctx);
These changes...
> }
>
> void si_decompress_compute_textures(struct si_context *sctx)
> {
> + if (sctx->blitter->running)
> + return;
> +
> si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
> +
> + si_decompress_resident_textures(sctx);
> + si_decompress_resident_images(sctx);
... and these changes can be moved into si_decompress_textures.
Marek
More information about the mesa-dev
mailing list