[Mesa-dev] [PATCH 18/26] i965: Track fast clear color in level/layer granularity

Pohjolainen, Topi topi.pohjolainen at gmail.com
Mon Oct 31 09:47:50 UTC 2016


Lets drop this patch for now. I added a patch restricting fast clear
to be used only for the first slice. Surface state can record only one RGBA-
tuple and hence it could only represent multiple slices if they all agreed on
the clear color value. This in turn is rather complex to track and is left for
the future when it is known to make a significant enough difference. Looking
at the benchmarks on SKL (which can still use compression for slices other
than the first), the perf improvement looks to come from being capable of fast
clearing slice 0 for multi-lod/layered. On BDW I'm seeing similar improvement 
backing this theory.

On Tue, Oct 11, 2016 at 10:26:50PM +0300, Topi Pohjolainen wrote:
> Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
> ---
>  src/mesa/drivers/dri/i965/brw_blorp.c            | 29 ++++++++++++---
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  3 +-
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c    | 28 ++++++++-------
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h    | 45 ++++++++++++------------
>  4 files changed, 65 insertions(+), 40 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c
> index 677886a..8782424 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.c
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
> @@ -185,7 +185,8 @@ blorp_surf_for_miptree(struct brw_context *brw,
>        /* We only really need a clear color if we also have an auxiliary
>         * surface.  Without one, it does nothing.
>         */
> -      surf->clear_color = intel_miptree_get_isl_clear_color(brw, mt);
> +      surf->clear_color =
> +         intel_miptree_get_isl_clear_color(brw, mt, *level, layer);
>  
>        surf->aux_surf = aux_surf;
>        surf->aux_addr = (struct blorp_address) {
> @@ -750,6 +751,25 @@ set_write_disables(const struct intel_renderbuffer *irb,
>  }
>  
>  static bool
> +set_slice_fast_clear_color(struct brw_context *brw,
> +                           struct intel_mipmap_level *level,
> +                           unsigned first_layer, unsigned num_layers,
> +                           const union gl_color_union *color)
> +{
> +   bool updated = false;
> +
> +   assert(first_layer == 0 && num_layers == 1);
> +
> +   for (unsigned i = 0; i < num_layers; ++i) {
> +      updated |= brw_meta_set_fast_clear_color(
> +                    brw, &level->slice[first_layer + i].gen9_fast_clear_color,
> +                    color);
> +   }
> +
> +   return updated;
> +}
> +
> +static bool
>  do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
>                        struct gl_renderbuffer *rb, unsigned buf,
>                        bool partial_clear, bool encode_srgb)
> @@ -788,6 +808,7 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
>  
>     const unsigned layer = intel_miptree_physical_to_logical_layer(
>                               irb->mt, irb->mt_layer);
> +   const unsigned num_layers = fb->MaxNumLayers ? irb->layer_count : 1;
>     const bool is_lossless_compressed = intel_miptree_is_lossless_compressed(
>                                            brw, irb->mt);
>     const enum intel_fast_clear_state fast_clear_state =
> @@ -802,8 +823,9 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
>         * programmed in SURFACE_STATE by later rendering and resolve
>         * operations.
>         */
> -      const bool color_updated = brw_meta_set_fast_clear_color(
> -                                    brw, &irb->mt->gen9_fast_clear_color,
> +      const bool color_updated = set_slice_fast_clear_color(
> +                                    brw, &irb->mt->level[irb->mt_level],
> +                                    irb->mt_layer, num_layers,
>                                      &override_color);
>  
>        /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, and the
> @@ -846,7 +868,6 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
>     struct blorp_surf surf;
>     unsigned level = irb->mt_level;
>     blorp_surf_for_miptree(brw, &surf, irb->mt, true, &level, layer, isl_tmp);
> -   const unsigned num_layers = fb->MaxNumLayers ? irb->layer_count : 1;
>  
>     if (can_fast_clear) {
>        DBG("%s (fast) to mt %p level %d layers %d+%d\n", __FUNCTION__,
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index 249f8f5..7ee9486 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -150,7 +150,8 @@ brw_emit_surface_state(struct brw_context *brw,
>        /* We only really need a clear color if we also have an auxiliary
>         * surfacae.  Without one, it does nothing.
>         */
> -      clear_color = intel_miptree_get_isl_clear_color(brw, mt);
> +      clear_color = intel_miptree_get_isl_clear_color(
> +                       brw, mt, view.base_level, view.base_array_layer);
>     }
>  
>     uint32_t *dw = __brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index f3d4cbe..0fed0ee 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -3442,8 +3442,10 @@ intel_miptree_get_aux_isl_surf(struct brw_context *brw,
>  
>  union isl_color_value
>  intel_miptree_get_isl_clear_color(struct brw_context *brw,
> -                                  const struct intel_mipmap_tree *mt)
> +                                  const struct intel_mipmap_tree *mt,
> +                                  unsigned level, unsigned layer)
>  {
> +   const struct intel_mipmap_slice *slice = &mt->level[level].slice[layer];
>     union isl_color_value clear_color;
>  
>     if (_mesa_get_format_base_format(mt->format) == GL_DEPTH_COMPONENT) {
> @@ -3452,20 +3454,20 @@ intel_miptree_get_isl_clear_color(struct brw_context *brw,
>        clear_color.i32[2] = 0;
>        clear_color.i32[3] = 0;
>     } else if (brw->gen >= 9) {
> -      clear_color.i32[0] = mt->gen9_fast_clear_color.i[0];
> -      clear_color.i32[1] = mt->gen9_fast_clear_color.i[1];
> -      clear_color.i32[2] = mt->gen9_fast_clear_color.i[2];
> -      clear_color.i32[3] = mt->gen9_fast_clear_color.i[3];
> +      clear_color.i32[0] = slice->gen9_fast_clear_color.i[0];
> +      clear_color.i32[1] = slice->gen9_fast_clear_color.i[1];
> +      clear_color.i32[2] = slice->gen9_fast_clear_color.i[2];
> +      clear_color.i32[3] = slice->gen9_fast_clear_color.i[3];
>     } else if (_mesa_is_format_integer(mt->format)) {
> -      clear_color.i32[0] = (mt->fast_clear_color_value & (1u << 31)) != 0;
> -      clear_color.i32[1] = (mt->fast_clear_color_value & (1u << 30)) != 0;
> -      clear_color.i32[2] = (mt->fast_clear_color_value & (1u << 29)) != 0;
> -      clear_color.i32[3] = (mt->fast_clear_color_value & (1u << 28)) != 0;
> +      clear_color.i32[0] = (slice->fast_clear_color_value & (1u << 31)) != 0;
> +      clear_color.i32[1] = (slice->fast_clear_color_value & (1u << 30)) != 0;
> +      clear_color.i32[2] = (slice->fast_clear_color_value & (1u << 29)) != 0;
> +      clear_color.i32[3] = (slice->fast_clear_color_value & (1u << 28)) != 0;
>     } else {
> -      clear_color.f32[0] = (mt->fast_clear_color_value & (1u << 31)) != 0;
> -      clear_color.f32[1] = (mt->fast_clear_color_value & (1u << 30)) != 0;
> -      clear_color.f32[2] = (mt->fast_clear_color_value & (1u << 29)) != 0;
> -      clear_color.f32[3] = (mt->fast_clear_color_value & (1u << 28)) != 0;
> +      clear_color.f32[0] = (slice->fast_clear_color_value & (1u << 31)) != 0;
> +      clear_color.f32[1] = (slice->fast_clear_color_value & (1u << 30)) != 0;
> +      clear_color.f32[2] = (slice->fast_clear_color_value & (1u << 29)) != 0;
> +      clear_color.f32[3] = (slice->fast_clear_color_value & (1u << 28)) != 0;
>     }
>  
>     return clear_color;
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> index db3ccb0..079fb4a 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> @@ -152,6 +152,27 @@ struct intel_mipmap_level
>        /** \} */
>  
>        /**
> +       * The SURFACE_STATE bits associated with the last fast color clear to
> +       * this color mipmap tree, if any.
> +       *
> +       * Prior to GEN9 there is a single bit for RGBA clear values which gives
> +       * you the option of 2^4 clear colors. Each bit determines if the color
> +       * channel is fully saturated or unsaturated (Cherryview does add a 32b
> +       * value per channel, but it is globally applied instead of being part
> +       * of the render surface state). Starting with GEN9, the surface state
> +       * accepts a 32b value for each color channel.
> +       *
> +       * @see RENDER_SURFACE_STATE.RedClearColor
> +       * @see RENDER_SURFACE_STATE.GreenClearColor
> +       * @see RENDER_SURFACE_STATE.BlueClearColor
> +       * @see RENDER_SURFACE_STATE.AlphaClearColor
> +       */
> +      union {
> +         uint32_t fast_clear_color_value;
> +         union gl_color_union gen9_fast_clear_color;
> +      };
> +
> +      /**
>         * Mapping information. Persistent for the duration of
>         * intel_miptree_map/unmap on this slice.
>         */
> @@ -584,27 +605,6 @@ struct intel_mipmap_tree
>     struct intel_mipmap_tree *plane[2];
>  
>     /**
> -    * The SURFACE_STATE bits associated with the last fast color clear to this
> -    * color mipmap tree, if any.
> -    *
> -    * Prior to GEN9 there is a single bit for RGBA clear values which gives you
> -    * the option of 2^4 clear colors. Each bit determines if the color channel
> -    * is fully saturated or unsaturated (Cherryview does add a 32b value per
> -    * channel, but it is globally applied instead of being part of the render
> -    * surface state). Starting with GEN9, the surface state accepts a 32b value
> -    * for each color channel.
> -    *
> -    * @see RENDER_SURFACE_STATE.RedClearColor
> -    * @see RENDER_SURFACE_STATE.GreenClearColor
> -    * @see RENDER_SURFACE_STATE.BlueClearColor
> -    * @see RENDER_SURFACE_STATE.AlphaClearColor
> -    */
> -   union {
> -      uint32_t fast_clear_color_value;
> -      union gl_color_union gen9_fast_clear_color;
> -   };
> -
> -   /**
>      * Disable allocation of auxiliary buffers, such as the HiZ buffer and MCS
>      * buffer. This is useful for sharing the miptree bo with an external client
>      * that doesn't understand auxiliary buffers.
> @@ -781,7 +781,8 @@ intel_miptree_get_aux_isl_surf(struct brw_context *brw,
>  
>  union isl_color_value
>  intel_miptree_get_isl_clear_color(struct brw_context *brw,
> -                                  const struct intel_mipmap_tree *mt);
> +                                  const struct intel_mipmap_tree *mt,
> +                                  unsigned level, unsigned layer);
>  
>  void
>  intel_get_image_dims(struct gl_texture_image *image,
> -- 
> 2.5.5
> 


More information about the mesa-dev mailing list