[Mesa-dev] [PATCH 1/2] i965: Fix render-to-texture in non-FinishRenderTexture cases.

Kenneth Graunke kenneth at whitecape.org
Wed Mar 5 16:37:17 PST 2014


On 03/05/2014 04:17 PM, Eric Anholt wrote:
> We've had several problems now with FinishRenderTexture not getting called
> enough, and we're ready to just give up on it ever doing what we need.  In
> particular, an upcoming Steam title had rendering bugs that could be fixed
> by always_flush_cache=true.
> 
> Instead of hoping Mesa core can figure out when we need to flush our
> caches, just track what BOs we've rendered to in a set, and when we render
> from a BO in that set, emit a flush and clear the set.
> 
> There's some overhead to keeping this set, but most of that is just
> hashing the pointer -- it turns out our set never even gets very large,
> because cache flushes are so common (even on cairo-gl).
> 
> No statistically significant performance difference in cairo-gl (n=100),
> despite spending ~.5% CPU in these set operations.
> 
> v1: (Original patch by Eric Anholt.)
> v2: (Changes by Ken Graunke.)
>   - Rebase forward from May 7th 2013 -> March 4th 2014.
>   - Drop the FinishRenderTexture hook entirely; after rebasing the
>     patch, the hook was just an empty function.
>   - Move the brw_render_cache_set_clear() call from
>     intel_batchbuffer_emit_flush() to brw_emit_pipe_control_flush().
>     In theory, this could catch more cases where we've flushed.
>   - Consider stencil as a possible texturing source.
> v3: (changes by anholt):
>   - Move set_clear() back to emit_mi_flush() -- it means we can drop
>     more forced flushes from the code.  In the previous location, it
>     wouldn't have been called when we wanted pre-gen6.
>   - Move the set clear from batch init to reset -- it should be empty at
>     the start of every batch, since the kernel handled any inter-batch
>     flush for us.
> 
> Signed-off-by: Eric Anholt <eric at anholt.net>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
>  src/mesa/drivers/dri/i965/brw_context.c       |  4 +-
>  src/mesa/drivers/dri/i965/brw_context.h       |  7 +++
>  src/mesa/drivers/dri/i965/brw_draw.c          | 30 +++++++++---
>  src/mesa/drivers/dri/i965/brw_misc_state.c    |  5 ++
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c |  4 ++
>  src/mesa/drivers/dri/i965/intel_fbo.c         | 66 +++++++++++++++++++--------
>  src/mesa/drivers/dri/i965/intel_fbo.h         |  4 ++
>  src/mesa/main/set.c                           |  6 +++
>  8 files changed, 99 insertions(+), 27 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
> index 1441b46..026b3aa 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -698,6 +698,8 @@ brwCreateContext(gl_api api,
>     /* Reinitialize the context point state.  It depends on ctx->Const values. */
>     _mesa_init_point(ctx);
>  
> +   intel_fbo_init(brw);
> +
>     intel_batchbuffer_init(brw);
>  
>     if (brw->gen >= 6) {
> @@ -721,8 +723,6 @@ brwCreateContext(gl_api api,
>  
>     intelInitExtensions(ctx);
>  
> -   intel_fbo_init(brw);
> -
>     brw_init_surface_formats(brw);
>  
>     if (brw->is_g4x || brw->gen >= 5) {
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index dbb30f2..734d273 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1030,6 +1030,13 @@ struct brw_context
>     drm_intel_context *hw_ctx;
>  
>     /**
> +    * Set of drm_intel_bo * that have been rendered to within this batchbuffer
> +    * and would need flushing before being used from another cache domain that
> +    * isn't coherent with it (i.e. the sampler).
> +    */
> +   struct set *render_cache;
> +
> +   /**
>      * Number of resets observed in the system at context creation.
>      *
>      * This is tracked in the context so that we can determine that another
> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
> index bc887fe..11a0361 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> @@ -297,8 +297,8 @@ static void brw_merge_inputs( struct brw_context *brw,
>  /*
>   * \brief Resolve buffers before drawing.
>   *
> - * Resolve the depth buffer's HiZ buffer and resolve the depth buffer of each
> - * enabled depth texture.
> + * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
> + * enabled depth texture, and flush the render cache for any dirty textures.
>   *
>   * (In the future, this will also perform MSAA resolves).
>   */
> @@ -314,9 +314,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
>     if (depth_irb)
>        intel_renderbuffer_resolve_hiz(brw, depth_irb);
>  
> -   /* Resolve depth buffer of each enabled depth texture, and color buffer of
> -    * each fast-clear-enabled color texture.
> -    */
> +   /* Resolve depth buffer and render cache of each enabled texture. */
>     for (int i = 0; i < ctx->Const.MaxCombinedTextureImageUnits; i++) {
>        if (!ctx->Texture.Unit[i]._ReallyEnabled)
>  	 continue;
> @@ -325,6 +323,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
>  	 continue;
>        intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
>        intel_miptree_resolve_color(brw, tex_obj->mt);
> +      brw_render_cache_set_check_flush(brw, tex_obj->mt->region->bo);
>     }
>  }
>  
> @@ -336,6 +335,9 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
>   *
>   * If the color buffer is a multisample window system buffer, then
>   * mark that it needs a downsample.
> + *
> + * Also mark any render targets which will be textured as needing a render
> + * cache flush.
>   */
>  static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
>  {
> @@ -345,6 +347,7 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
>     struct intel_renderbuffer *front_irb = NULL;
>     struct intel_renderbuffer *back_irb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
>     struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
> +   struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
>     struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH];
>  
>     if (brw->is_front_buffer_rendering)
> @@ -354,8 +357,23 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
>        front_irb->need_downsample = true;
>     if (back_irb)
>        back_irb->need_downsample = true;
> -   if (depth_irb && ctx->Depth.Mask)
> +   if (depth_irb && ctx->Depth.Mask) {
>        intel_renderbuffer_att_set_needs_depth_resolve(depth_att);
> +      brw_render_cache_set_add_bo(brw, depth_irb->mt->region->bo);
> +   }
> +
> +   if (ctx->Extensions.ARB_stencil_texturing &&
> +       stencil_irb && ctx->Stencil._WriteEnabled) {
> +      brw_render_cache_set_add_bo(brw, stencil_irb->mt->region->bo);
> +   }
> +
> +   for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
> +      struct intel_renderbuffer *irb =
> +         intel_renderbuffer(fb->_ColorDrawBuffers[i]);
> +
> +      if (irb)
> +         brw_render_cache_set_add_bo(brw, irb->mt->region->bo);
> +   }
>  }
>  
>  /* May fail if out of video memory for texture or vbo upload, or on
> diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
> index b984d47..c8fb6f3 100644
> --- a/src/mesa/drivers/dri/i965/brw_misc_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
> @@ -552,6 +552,11 @@ brw_emit_depthbuffer(struct brw_context *brw)
>        height = stencil_irb->Base.Base.Height;
>     }
>  
> +   if (depth_mt)
> +      brw_render_cache_set_check_flush(brw, depth_mt->region->bo);
> +   if (stencil_mt)
> +      brw_render_cache_set_check_flush(brw, stencil_mt->region->bo);
> +
>     brw->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
>                                      depthbuffer_format, depth_surface_type,
>                                      stencil_mt, hiz, separate_stencil,
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index 98759e2..fe95342 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -30,6 +30,7 @@
>  #include "intel_reg.h"
>  #include "intel_bufmgr.h"
>  #include "intel_buffers.h"
> +#include "intel_fbo.h"
>  #include "brw_context.h"
>  
>  static void
> @@ -88,6 +89,7 @@ intel_batchbuffer_reset(struct brw_context *brw)
>     brw->batch.last_bo = brw->batch.bo;
>  
>     intel_batchbuffer_clear_cache(brw);
> +   brw_render_cache_set_clear(brw);
>  
>     brw->batch.bo = drm_intel_bo_alloc(brw->bufmgr, "batchbuffer",
>  					BATCH_SZ, 4096);
> @@ -696,6 +698,8 @@ intel_batchbuffer_emit_mi_flush(struct brw_context *brw)
>        }
>        brw_emit_pipe_control_flush(brw, flags);
>     }
> +
> +   brw_render_cache_set_clear(brw);
>  }
>  
>  void
> diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
> index d11cdb6..0f9abac 100644
> --- a/src/mesa/drivers/dri/i965/intel_fbo.c
> +++ b/src/mesa/drivers/dri/i965/intel_fbo.c
> @@ -36,6 +36,8 @@
>  #include "main/context.h"
>  #include "main/teximage.h"
>  #include "main/image.h"
> +#include "main/hash_table.h"
> +#include "main/set.h"
>  
>  #include "swrast/swrast.h"
>  #include "drivers/common/meta.h"
> @@ -600,24 +602,6 @@ intel_render_texture(struct gl_context * ctx,
>  }
>  
>  
> -/**
> - * Called by Mesa when rendering to a texture is done.
> - */
> -static void
> -intel_finish_render_texture(struct gl_context * ctx, struct gl_renderbuffer *rb)
> -{
> -   struct brw_context *brw = brw_context(ctx);
> -
> -   DBG("Finish render %s texture\n", _mesa_get_format_name(rb->Format));
> -
> -   /* Since we've (probably) rendered to the texture and will (likely) use
> -    * it in the texture domain later on in this batchbuffer, flush the
> -    * batch.  Once again, we wish for a domain tracker in libdrm to cover
> -    * usage inside of a batchbuffer like GEM does in the kernel.
> -    */
> -   intel_batchbuffer_emit_mi_flush(brw);
> -}
> -
>  #define fbo_incomplete(fb, ...) do {                                          \
>        static GLuint msg_id = 0;                                               \
>        if (unlikely(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) {    \
> @@ -953,6 +937,49 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
>     intel_miptree_release(&new_mt);
>  }
>  
> +void
> +brw_render_cache_set_clear(struct brw_context *brw)
> +{
> +   struct set_entry *entry;
> +
> +   fprintf(stderr, "clear\n");
> +
> +   set_foreach(brw->render_cache, entry) {
> +      _mesa_set_remove(brw->render_cache, entry);
> +   }
> +}
> +
> +void
> +brw_render_cache_set_add_bo(struct brw_context *brw, drm_intel_bo *bo)
> +{
> +   printf("    add bo %p\n", bo);
> +   _mesa_set_add(brw->render_cache, _mesa_hash_pointer(bo), bo);
> +}
> +
> +/**
> + * Emits an appropriate flush for a BO if it has been rendered to within the
> + * same batchbuffer as a read that's about to be emitted.
> + *
> + * The GPU has separate, incoherent caches for the render cache and the
> + * sampler cache, along with other caches.  Usually data in the different
> + * caches don't interact (e.g. we don't render to our driver-generated
> + * immediate constant data), but for render-to-texture in FBOs we definitely
> + * do.  When a batchbuffer is flushed, the kernel will ensure that everything
> + * necessary is flushed before another use of that BO, but for reuse from
> + * different caches within a batchbuffer, it's all our responsibility.
> + */
> +void
> +brw_render_cache_set_check_flush(struct brw_context *brw, drm_intel_bo *bo)
> +{
> +   if (!_mesa_set_search(brw->render_cache, _mesa_hash_pointer(bo), bo)) {
> +      printf("noflush bo %p\n", bo);
> +      return;
> +   }
> +
> +   printf("  flush bo %p\n", bo);
> +   intel_batchbuffer_emit_mi_flush(brw);
> +}
> +
>  /**
>   * Do one-time context initializations related to GL_EXT_framebuffer_object.
>   * Hook in device driver functions.
> @@ -966,9 +993,10 @@ intel_fbo_init(struct brw_context *brw)
>     dd->MapRenderbuffer = intel_map_renderbuffer;
>     dd->UnmapRenderbuffer = intel_unmap_renderbuffer;
>     dd->RenderTexture = intel_render_texture;
> -   dd->FinishRenderTexture = intel_finish_render_texture;
>     dd->ValidateFramebuffer = intel_validate_framebuffer;
>     dd->BlitFramebuffer = intel_blit_framebuffer;
>     dd->EGLImageTargetRenderbufferStorage =
>        intel_image_target_renderbuffer_storage;
> +
> +   brw->render_cache = _mesa_set_create(brw, _mesa_key_pointer_equal);
>  }
> diff --git a/src/mesa/drivers/dri/i965/intel_fbo.h b/src/mesa/drivers/dri/i965/intel_fbo.h
> index b8db7e2..fa457e2 100644
> --- a/src/mesa/drivers/dri/i965/intel_fbo.h
> +++ b/src/mesa/drivers/dri/i965/intel_fbo.h
> @@ -237,6 +237,10 @@ void
>  intel_renderbuffer_upsample(struct brw_context *brw,
>                              struct intel_renderbuffer *irb);
>  
> +void brw_render_cache_set_clear(struct brw_context *brw);
> +void brw_render_cache_set_add_bo(struct brw_context *brw, drm_intel_bo *bo);
> +void brw_render_cache_set_check_flush(struct brw_context *brw, drm_intel_bo *bo);
> +
>  unsigned
>  intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples);
>  
> diff --git a/src/mesa/main/set.c b/src/mesa/main/set.c
> index dc3550c..bbae906 100644
> --- a/src/mesa/main/set.c
> +++ b/src/mesa/main/set.c
> @@ -199,6 +199,8 @@ set_rehash(struct set *ht, int new_size_index)
>     if (table == NULL)
>        return;
>  
> +   fprintf(stderr, "rehash %d\n", hash_sizes[ht->size_index].max_entries);
> +
>     old_ht = *ht;
>  
>     ht->table = table;
> @@ -237,11 +239,15 @@ _mesa_set_add(struct set *ht, uint32_t hash, const void *key)
>        set_rehash(ht, ht->size_index);
>     }
>  
> +   fprintf(stderr, "add %d %d\n", ht->entries, ht->deleted_entries);
>     hash_address = hash % ht->size;
>     do {
>        struct set_entry *entry = ht->table + hash_address;
>        uint32_t double_hash;
>  
> +      fprintf(stderr, "%d %d\n", entry_is_present(entry),
> +             entry_is_deleted(entry));
> +
>        if (!entry_is_present(entry)) {
>           if (entry_is_deleted(entry))
>              ht->deleted_entries--;
> 

Assuming you drop the debug prints, this is:
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

We should probably also send it to 10.1.1.  I can backport it (just
dropping the stencil hunk) once it lands.

--Ken

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: OpenPGP digital signature
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20140305/40c10935/attachment.pgp>


More information about the mesa-dev mailing list