[Mesa-dev] [PATCH 16/18] intel/blorp: Properly handle color compression in blorp_copy

Fri Nov 4 09:17:13 UTC 2016

On Fri, Oct 28, 2016 at 02:17:12AM -0700, Jason Ekstrand wrote:
> Previously, blorp copy operations were CCS-unaware so you had to perform
> resolves on the source and destination before performing the copy.  This
> commit makes blorp_copy capable of handling CCS-compressed images without
> any resolves.
> 
> Signed-off-by: Jason Ekstrand <jason at jlekstrand.net>
> ---
>  src/intel/blorp/blorp_blit.c | 173 ++++++++++++++++++++++++++++++++++++++++++-
>  src/intel/blorp/blorp_priv.h |   6 ++
>  2 files changed, 177 insertions(+), 2 deletions(-)
> 
> diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
> index 07bb181..598b7c9 100644
> --- a/src/intel/blorp/blorp_blit.c
> +++ b/src/intel/blorp/blorp_blit.c
> @@ -851,6 +851,66 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos,
>                        frac_y);
>  }
>  
> +/** Perform a color bit-cast operation
> + *
> + * For copy operations involving CCS, we may need to use different formats for
> + * the source and destination surfaces.  The two formats must both be UINT
> + * formats and must have the same size but may have different bit layouts.
> + * For instance, we may be copying from R8G8B8A8_UINT to R32_UINT or R32_UINT
> + * to R16G16_UINT.  This function generates code to shuffle bits around to get
> + * us from one to the other.
> + */
> +static nir_ssa_def *
> +bit_cast_color(struct nir_builder *b, nir_ssa_def *color,
> +               const struct brw_blorp_blit_prog_key *key)
> +{
> +   assert(key->texture_data_type == nir_type_uint);
> +
> +   if (key->dst_bpc > key->src_bpc) {
> +      nir_ssa_def *u = nir_ssa_undef(b, 1, 32);
> +      nir_ssa_def *dst_chan[2] = { u, u };
> +      unsigned shift = 0;
> +      unsigned dst_idx = 0;
> +      for (unsigned i = 0; i < 4; i++) {
> +         nir_ssa_def *shifted = nir_ishl(b, nir_channel(b, color, i),
> +                                            nir_imm_int(b, shift));
> +         if (shift == 0) {
> +            dst_chan[dst_idx] = shifted;
> +         } else {
> +            dst_chan[dst_idx] = nir_ior(b, dst_chan[dst_idx], shifted);
> +         }
> +
> +         shift += key->src_bpc;
> +         if (shift >= key->dst_bpc) {
> +            dst_idx++;
> +            shift = 0;
> +         }
> +      }

This iterates unconditionally over 4 source components regardless how many
there are in source, right? Lets assume we have dst == R32_UINT and
src == R16G16_UINT, don't we get the following where iterations with i = 2
and i = 3 try to access non-existing z and w components:

i = 0:
            nir_ssa_def *shifted = nir_ishl(b, nir_channel(b, color, 0),
                                            nir_imm_int(b, 0));
            dst_chan[0] = shifted;

i = 1:
            nir_ssa_def *shifted = nir_ishl(b, nir_channel(b, color, 1),
                                            nir_imm_int(b, 16));
            dst_chan[0] = nir_ior(b, dst_chan[0], shifted);

i = 2:
            nir_ssa_def *shifted = nir_ishl(b, nir_channel(b, color, 2),
                                            nir_imm_int(b, 0));
            dst_chan[1] = shifted;

i = 3:
            nir_ssa_def *shifted = nir_ishl(b, nir_channel(b, color, 3),
                                            nir_imm_int(b, 16));
            dst_chan[1] = nir_ior(b, dst_chan[1], shifted);