[Mesa-dev] [PATCH 25/28] i965/blorp: Add support for averaging resolves to the NIR path

Pohjolainen, Topi topi.pohjolainen at intel.com
Fri May 13 06:08:22 UTC 2016


On Tue, May 10, 2016 at 04:16:45PM -0700, Jason Ekstrand wrote:
> ---
>  src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 162 ++++++++++++++++++++++++---
>  1 file changed, 144 insertions(+), 18 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> index 7b01da8..83cdac5 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> @@ -840,6 +840,133 @@ blorp_nir_decode_msaa(nir_builder *b, nir_ssa_def *pos,
>  }
>  
>  /**
> + * Count the number of trailing 1 bits in the given value.  For example:
> + *
> + * count_trailing_one_bits(0) == 0
> + * count_trailing_one_bits(7) == 3
> + * count_trailing_one_bits(11) == 2
> + */
> +static inline int count_trailing_one_bits(unsigned value)
> +{
> +#ifdef HAVE___BUILTIN_CTZ
> +   return __builtin_ctz(~value);
> +#else
> +   return _mesa_bitcount(value & ~(value + 1));
> +#endif
> +}
> +
> +static nir_ssa_def *
> +blorp_nir_manual_blend_average(nir_builder *b, nir_ssa_def *pos,
> +                               unsigned tex_samples,
> +                               enum intel_msaa_layout tex_layout,
> +                               enum brw_reg_type dst_type)
> +{
> +   /* If non-null, this is the outer-most if statement */
> +   nir_if *outer_if = NULL;
> +
> +   nir_variable *color =
> +      nir_local_variable_create(b->impl, glsl_vec4_type(), "color");
> +
> +   nir_ssa_def *mcs = NULL;
> +   if (tex_layout == INTEL_MSAA_LAYOUT_CMS)
> +      mcs = blorp_nir_txf_ms_mcs(b, pos);
> +
> +   /* We add together samples using a binary tree structure, e.g. for 4x MSAA:
> +    *
> +    *   result = ((sample[0] + sample[1]) + (sample[2] + sample[3])) / 4
> +    *
> +    * This ensures that when all samples have the same value, no numerical
> +    * precision is lost, since each addition operation always adds two equal
> +    * values, and summing two equal floating point values does not lose
> +    * precision.
> +    *
> +    * We perform this computation by treating the texture_data array as a
> +    * stack and performing the following operations:
> +    *
> +    * - push sample 0 onto stack
> +    * - push sample 1 onto stack
> +    * - add top two stack entries
> +    * - push sample 2 onto stack
> +    * - push sample 3 onto stack
> +    * - add top two stack entries
> +    * - add top two stack entries
> +    * - divide top stack entry by 4
> +    *
> +    * Note that after pushing sample i onto the stack, the number of add
> +    * operations we do is equal to the number of trailing 1 bits in i.  This
> +    * works provided the total number of samples is a power of two, which it
> +    * always is for i965.
> +    *
> +    * For integer formats, we replace the add operations with average
> +    * operations and skip the final division.
> +    */
> +   nir_ssa_def *texture_data[4];
> +   unsigned stack_depth = 0;
> +   for (unsigned i = 0; i < tex_samples; ++i) {
> +      assert(stack_depth == _mesa_bitcount(i)); /* Loop invariant */
> +
> +      /* Push sample i onto the stack */
> +      assert(stack_depth < ARRAY_SIZE(texture_data));
> +
> +      nir_ssa_def *ms_pos = nir_vec3(b, nir_channel(b, pos, 0),
> +                                        nir_channel(b, pos, 1),
> +                                        nir_imm_int(b, i));
> +      texture_data[stack_depth++] = blorp_nir_txf_ms(b, ms_pos, mcs, dst_type);
> +
> +      if (i == 0 && tex_layout == INTEL_MSAA_LAYOUT_CMS) {
> +         /* The Ivy Bridge PRM, Vol4 Part1 p27 (Multisample Control Surface)
> +          * suggests an optimization:
> +          *
> +          *     "A simple optimization with probable large return in
> +          *     performance is to compare the MCS value to zero (indicating
> +          *     all samples are on sample slice 0), and sample only from
> +          *     sample slice 0 using ld2dss if MCS is zero."
> +          *
> +          * Note that in the case where the MCS value is zero, sampling from
> +          * sample slice 0 using ld2dss and sampling from sample 0 using
> +          * ld2dms are equivalent (since all samples are on sample slice 0).
> +          * Since we have already sampled from sample 0, all we need to do is
> +          * skip the remaining fetches and averaging if MCS is zero.
> +          */
> +         nir_ssa_def *mcs_zero =
> +            nir_ieq(b, nir_channel(b, mcs, 0), nir_imm_int(b, 0));
> +         nir_if *if_stmt = nir_if_create(b->shader);
> +         if_stmt->condition = nir_src_for_ssa(mcs_zero);
> +         nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
> +
> +         b->cursor = nir_after_cf_list(&if_stmt->then_list);
> +         nir_store_var(b, color, texture_data[0], 0xf);
> +
> +         b->cursor = nir_after_cf_list(&if_stmt->else_list);
> +         outer_if = if_stmt;
> +      }
> +
> +      for (int j = 0; j < count_trailing_one_bits(i); j++) {
> +         assert(stack_depth >= 2);
> +         --stack_depth;
> +
> +         assert(dst_type == BRW_REGISTER_TYPE_F);
> +         texture_data[stack_depth - 1] =
> +            nir_fadd(b, texture_data[stack_depth - 1],
> +                        texture_data[stack_depth]);
> +      }
> +   }
> +
> +   /* We should have just 1 sample on the stack now. */
> +   assert(stack_depth == 1);
> +
> +   texture_data[0] = nir_fmul(b, texture_data[0],
> +                              nir_imm_float(b, 1.0 / tex_samples));
> +
> +   nir_store_var(b, color, texture_data[0], 0xf);
> +
> +   if (outer_if)
> +      b->cursor = nir_after_cf_node(&outer_if->cf_node);
> +
> +   return nir_load_var(b, color);
> +}
> +
> +/**
>   * Generator for WM programs used in BLORP blits.
>   *
>   * The bulk of the work done by the WM program is to wrap and unwrap the
> @@ -1095,7 +1222,23 @@ brw_blorp_build_nir_shader(struct brw_context *brw,
>      * irrelevant, because we are going to fetch all samples.
>      */
>     if (key->blend && !key->blit_scaled) {
> -      goto fail;
> +      if (brw->gen == 6) {

Original has this little piece of comment (with typo fixed s/an/can/):

            /* Gen6 hardware can automatically blend using the SAMPLE message */

Anyway, looks good:

Reviewed-by: Topi Pohjolainen <topi.pohjolainen at intel.com>

> +         /* When looking up samples in an MSAA texture using the SAMPLE
> +          * message, Gen6 requires the texture coordinates to be odd integers
> +          * (so that they correspond to the center of a 2x2 block representing
> +          * the four samples that maxe up a pixel).  So we need to multiply
> +          * our X and Y coordinates each by 2 and then add 1.
> +          */
> +         src_pos = nir_ishl(&b, src_pos, nir_imm_int(&b, 1));
> +         src_pos = nir_iadd(&b, src_pos, nir_imm_int(&b, 1));
> +         src_pos = nir_i2f(&b, nir_channels(&b, src_pos, 0x3));
> +         color = blorp_nir_tex(&b, src_pos, key->texture_data_type);
> +      } else {
> +         /* Gen7+ hardware doesn't automaticaly blend. */
> +         color = blorp_nir_manual_blend_average(&b, src_pos, key->src_samples,
> +                                                key->src_layout,
> +                                                key->texture_data_type);
> +      }
>     } else if (key->blend && key->blit_scaled) {
>        goto fail;
>     } else {
> @@ -1991,23 +2134,6 @@ brw_blorp_blit_program::clamp_tex_coords(struct brw_reg regX,
>  
>  
>  
> -/**
> - * Count the number of trailing 1 bits in the given value.  For example:
> - *
> - * count_trailing_one_bits(0) == 0
> - * count_trailing_one_bits(7) == 3
> - * count_trailing_one_bits(11) == 2
> - */
> -static inline int count_trailing_one_bits(unsigned value)
> -{
> -#ifdef HAVE___BUILTIN_CTZ
> -   return __builtin_ctz(~value);
> -#else
> -   return _mesa_bitcount(value & ~(value + 1));
> -#endif
> -}
> -
> -
>  void
>  brw_blorp_blit_program::manual_blend_average(unsigned num_samples)
>  {
> -- 
> 2.5.0.400.gff86faf
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list