[Mesa-dev] [PATCH] intel: Fix render-to-texture in non-FinishRenderTexture cases.

Kenneth Graunke kenneth at whitecape.org
Wed May 8 17:00:45 PDT 2013


On 05/07/2013 03:53 PM, Eric Anholt wrote:
> With EGL_KHR_gl_renderbuffer_iamge, we have the ability to render to
> renderbuffers that are also textures, so the core Mesa FinishRenderTexture
> hook doesn't get called.  That hook also wasn't called in various cases
> within the driver where we'd update texture contents using the render
> cache (like glCopyTexSubImage) that resulted in
> intel_batchbuffer_emit_mi_flush().
>
> To fix it, track a set of rendered-to BOs in our context, which is
> cleared at batch wrap or emit_mi_flush time, and do an emit_mi_flush if
> one of our textures is in that set.
>
> This change doesn't turn the other emit_mi_flushes (such as intel_blit.c
> operations) into render_cache_set operations yet, as that would increase the
> size of our set and we expect that those operations get immediately flushed
> anyway.
>
> No statistically significant performance difference in cairo-gl (n=53/54, slow
> turbo outliers removed), despite spending ~1% CPU in these set operations.

I noticed that you call intel_render_cache_set_check_flush() for 
0..BRW_MAX_TEX_UNITS.  Each of those calls computes the hash pointer and 
searches the cache, which isn't free...but if you ever find an entry, 
you clear the whole cache.

You might be able to speed this up slightly by maintaining a boolean "is 
the set empty?" flag.  Just flag it in add, and unflag it in clear.

The i965 parts of this are:
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

I'll try and test it tomorrow.

I have no idea about the 8xx/9xx code.  I'll trust you on that.

>
> Fixes piglit EGL_KHR_gl_renderbuffer_image/renderbuffer-texture.
> ---
>   src/mesa/drivers/dri/i915/i830_texstate.c      |  3 ++
>   src/mesa/drivers/dri/i915/i915_texstate.c      |  3 ++
>   src/mesa/drivers/dri/i915/intel_tris.c         | 22 ++++++++++++
>   src/mesa/drivers/dri/i965/brw_draw.c           | 23 +++++++++---
>   src/mesa/drivers/dri/i965/brw_misc_state.c     |  5 +++
>   src/mesa/drivers/dri/intel/intel_batchbuffer.c |  5 +++
>   src/mesa/drivers/dri/intel/intel_context.h     |  7 ++++
>   src/mesa/drivers/dri/intel/intel_fbo.c         | 49 ++++++++++++++++++++++----
>   src/mesa/drivers/dri/intel/intel_fbo.h         |  6 ++++
>   9 files changed, 112 insertions(+), 11 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c
> index f186fac..6b1dbf0 100644
> --- a/src/mesa/drivers/dri/i915/i830_texstate.c
> +++ b/src/mesa/drivers/dri/i915/i830_texstate.c
> @@ -33,6 +33,7 @@
>
>   #include "intel_mipmap_tree.h"
>   #include "intel_tex.h"
> +#include "intel_fbo.h"
>
>   #include "i830_context.h"
>   #include "i830_reg.h"
> @@ -128,6 +129,8 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
>      GLubyte border[4];
>      GLuint dst_x, dst_y;
>
> +   intel_render_cache_set_check_flush(intel, intelObj->mt->region->bo);
> +
>      memset(state, 0, sizeof(*state));
>
>      /*We need to refcount these. */
> diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c
> index 43c802b..148da15 100644
> --- a/src/mesa/drivers/dri/i915/i915_texstate.c
> +++ b/src/mesa/drivers/dri/i915/i915_texstate.c
> @@ -33,6 +33,7 @@
>
>   #include "intel_mipmap_tree.h"
>   #include "intel_tex.h"
> +#include "intel_fbo.h"
>
>   #include "i915_context.h"
>   #include "i915_reg.h"
> @@ -151,6 +152,8 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
>      GLubyte border[4];
>      GLfloat maxlod;
>
> +   intel_render_cache_set_check_flush(intel, intelObj->mt->region->bo);
> +
>      memset(state, 0, sizeof(*state));
>
>      /*We need to refcount these. */
> diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c
> index 7c60d84..1f27243 100644
> --- a/src/mesa/drivers/dri/i915/intel_tris.c
> +++ b/src/mesa/drivers/dri/i915/intel_tris.c
> @@ -52,6 +52,8 @@
>   #include "intel_batchbuffer.h"
>   #include "intel_buffers.h"
>   #include "intel_reg.h"
> +#include "intel_fbo.h"
> +#include "intel_mipmap_tree.h"
>   #include "i830_context.h"
>   #include "i830_reg.h"
>   #include "i915_context.h"
> @@ -61,6 +63,22 @@ static void intelRasterPrimitive(struct gl_context * ctx, GLenum rprim,
>                                    GLuint hwprim);
>
>   static void
> +mark_render_cache(struct intel_context *intel)
> +{
> +   struct gl_context *ctx = &intel->ctx;
> +   struct gl_framebuffer *fb = ctx->DrawBuffer;
> +   struct intel_renderbuffer *depth_irb =
> +      intel_get_renderbuffer(fb, BUFFER_DEPTH);
> +   struct intel_renderbuffer *color_irb =
> +      intel_renderbuffer(fb->_ColorDrawBuffers[0]);
> +
> +   if (color_irb)
> +      intel_render_cache_set_add_bo(intel, color_irb->mt->region->bo);
> +   if (depth_irb)
> +      intel_render_cache_set_add_bo(intel, depth_irb->mt->region->bo);
> +}
> +
> +static void
>   intel_flush_inline_primitive(struct intel_context *intel)
>   {
>      GLuint used = intel->batch.used - intel->prim.start_ptr;
> @@ -75,6 +93,8 @@ intel_flush_inline_primitive(struct intel_context *intel)
>      intel->batch.map[intel->prim.start_ptr] =
>         _3DPRIMITIVE | intel->prim.primitive | (used - 2);
>
> +   mark_render_cache(intel);
> +
>      goto finished;
>
>    do_discard:
> @@ -310,6 +330,8 @@ void intel_flush_prim(struct intel_context *intel)
>         ADVANCE_BATCH();
>      }
>
> +   mark_render_cache(intel);
> +
>      if (intel->always_flush_cache) {
>         intel_batchbuffer_emit_mi_flush(intel);
>      }
> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
> index 8c37e0b..670d648 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> @@ -292,8 +292,8 @@ static void brw_merge_inputs( struct brw_context *brw,
>   /*
>    * \brief Resolve buffers before drawing.
>    *
> - * Resolve the depth buffer's HiZ buffer and resolve the depth buffer of each
> - * enabled depth texture.
> + * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
> + * enabled depth texture, and flush the render cache for any dirty textures.
>    *
>    * (In the future, this will also perform MSAA resolves).
>    */
> @@ -310,7 +310,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
>      if (depth_irb)
>         intel_renderbuffer_resolve_hiz(intel, depth_irb);
>
> -   /* Resolve depth buffer of each enabled depth texture. */
> +   /* Resolve depth buffer and render cache of each enabled texture. */
>      for (int i = 0; i < BRW_MAX_TEX_UNIT; i++) {
>         if (!ctx->Texture.Unit[i]._ReallyEnabled)
>   	 continue;
> @@ -318,6 +318,8 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
>         if (!tex_obj || !tex_obj->mt)
>   	 continue;
>         intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt);
> +
> +      intel_render_cache_set_check_flush(intel, tex_obj->mt->region->bo);
>      }
>   }
>
> @@ -329,6 +331,9 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
>    *
>    * If the color buffer is a multisample window system buffer, then
>    * mark that it needs a downsample.
> + *
> + * For all render targets that might be textured (everything but stencil), mark
> + * that it needs a render cache flush if it will be textured.
>    */
>   static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
>   {
> @@ -347,8 +352,18 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
>         intel_renderbuffer_set_needs_downsample(front_irb);
>      if (back_irb)
>         intel_renderbuffer_set_needs_downsample(back_irb);
> -   if (depth_irb && ctx->Depth.Mask)
> +   if (depth_irb && ctx->Depth.Mask) {
>         intel_renderbuffer_set_needs_depth_resolve(depth_irb);
> +      intel_render_cache_set_add_bo(intel, depth_irb->mt->region->bo);
> +   }
> +
> +   for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
> +      struct intel_renderbuffer *irb =
> +         intel_renderbuffer(fb->_ColorDrawBuffers[i]);
> +
> +      if (irb)
> +         intel_render_cache_set_add_bo(intel, irb->mt->region->bo);
> +   }
>   }
>
>   static int
> diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
> index 6b61929..d7dafe2 100644
> --- a/src/mesa/drivers/dri/i965/brw_misc_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
> @@ -641,6 +641,11 @@ brw_emit_depthbuffer(struct brw_context *brw)
>         height = stencil_irb->Base.Base.Height;
>      }
>
> +   if (depth_mt)
> +      intel_render_cache_set_check_flush(intel, depth_mt->region->bo);
> +   if (stencil_mt)
> +      intel_render_cache_set_check_flush(intel, stencil_mt->region->bo);
> +
>      intel->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
>                                         depthbuffer_format, depth_surface_type,
>                                         stencil_mt, hiz, separate_stencil,
> diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
> index 8c6524e..a3ec89a 100644
> --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
> @@ -31,6 +31,7 @@
>   #include "intel_reg.h"
>   #include "intel_bufmgr.h"
>   #include "intel_buffers.h"
> +#include "intel_fbo.h"
>
>   static void
>   intel_batchbuffer_reset(struct intel_context *intel);
> @@ -273,6 +274,8 @@ _intel_batchbuffer_flush(struct intel_context *intel,
>       */
>      intel_batchbuffer_reset(intel);
>
> +   intel_render_cache_set_clear(intel);
> +
>      return ret;
>   }
>
> @@ -557,4 +560,6 @@ intel_batchbuffer_emit_mi_flush(struct intel_context *intel)
>         OUT_BATCH(MI_FLUSH);
>         ADVANCE_BATCH();
>      }
> +
> +   intel_render_cache_set_clear(intel);
>   }
> diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
> index c0f07ff..3cf7f32 100644
> --- a/src/mesa/drivers/dri/intel/intel_context.h
> +++ b/src/mesa/drivers/dri/intel/intel_context.h
> @@ -249,6 +249,13 @@ struct intel_context
>
>      drm_intel_context *hw_ctx;
>
> +   /**
> +    * Set of drm_intel_bo * that have been rendered to within this batchbuffer
> +    * and would need flushing before being used from another cache domain that
> +    * isn't coherent with it (i.e. the sampler).
> +    */
> +   struct set *render_cache;
> +
>      struct intel_batchbuffer batch;
>
>      drm_intel_bo *first_post_swapbuffers_batch;
> diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
> index 2e36b0b..a621c2e 100644
> --- a/src/mesa/drivers/dri/intel/intel_fbo.c
> +++ b/src/mesa/drivers/dri/intel/intel_fbo.c
> @@ -36,6 +36,8 @@
>   #include "main/context.h"
>   #include "main/teximage.h"
>   #include "main/image.h"
> +#include "main/hash_table.h"
> +#include "main/set.h"
>
>   #include "swrast/swrast.h"
>   #include "drivers/common/meta.h"
> @@ -631,13 +633,6 @@ intel_finish_render_texture(struct gl_context * ctx,
>
>      if (irb)
>         irb->tex_image = NULL;
> -
> -   /* Since we've (probably) rendered to the texture and will (likely) use
> -    * it in the texture domain later on in this batchbuffer, flush the
> -    * batch.  Once again, we wish for a domain tracker in libdrm to cover
> -    * usage inside of a batchbuffer like GEM does in the kernel.
> -    */
> -   intel_batchbuffer_emit_mi_flush(intel);
>   }
>
>   #define fbo_incomplete(fb, ...) do {                                          \
> @@ -974,6 +969,44 @@ intel_renderbuffer_move_to_temp(struct intel_context *intel,
>      intel_miptree_release(&new_mt);
>   }
>
> +void
> +intel_render_cache_set_clear(struct intel_context *intel)
> +{
> +   struct set_entry *entry;
> +
> +   set_foreach(intel->render_cache, entry) {
> +      _mesa_set_remove(intel->render_cache, entry);
> +   }
> +}
> +
> +void
> +intel_render_cache_set_add_bo(struct intel_context *intel, drm_intel_bo *bo)
> +{
> +   _mesa_set_add(intel->render_cache, _mesa_hash_pointer(bo), bo);
> +}
> +
> +/**
> + * Emits an appropriate flush for a BO if it has been rendered to within the
> + * same batchbuffer as a read that's about to be emitted.
> + *
> + * The GPU has separate, incoherent caches for the render cache and the
> + * sampler cache, along with other caches.  Usually data in the different
> + * caches don't interact (e.g. we don't render to our driver-generated
> + * immediate constant data), but for render-to-texture in FBOs we definitely
> + * do.  When a batchbuffer is flushed, the kernel will ensure that everything
> + * necessary is flushed before another use of that BO, but for reuse from
> + * different caches within a batchbuffer, it's all our responsibility.
> + */
> +void
> +intel_render_cache_set_check_flush(struct intel_context *intel,
> +                                   drm_intel_bo *bo)
> +{
> +   if (!_mesa_set_search(intel->render_cache, _mesa_hash_pointer(bo), bo))
> +       return;
> +
> +   intel_batchbuffer_emit_mi_flush(intel);
> +}
> +
>   /**
>    * Do one-time context initializations related to GL_EXT_framebuffer_object.
>    * Hook in device driver functions.
> @@ -994,4 +1027,6 @@ intel_fbo_init(struct intel_context *intel)
>      intel->ctx.Driver.BlitFramebuffer = intel_blit_framebuffer;
>      intel->ctx.Driver.EGLImageTargetRenderbufferStorage =
>         intel_image_target_renderbuffer_storage;
> +
> +   intel->render_cache = _mesa_set_create(intel, _mesa_key_pointer_equal);
>   }
> diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
> index 0e0806b..463b5bc 100644
> --- a/src/mesa/drivers/dri/intel/intel_fbo.h
> +++ b/src/mesa/drivers/dri/intel/intel_fbo.h
> @@ -200,6 +200,12 @@ void intel_renderbuffer_move_to_temp(struct intel_context *intel,
>                                        struct intel_renderbuffer *irb,
>                                        bool invalidate);
>
> +void intel_render_cache_set_clear(struct intel_context *intel);
> +void intel_render_cache_set_add_bo(struct intel_context *intel,
> +                                   drm_intel_bo *bo);
> +void intel_render_cache_set_check_flush(struct intel_context *intel,
> +                                        drm_intel_bo *bo);
> +
>   unsigned
>   intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples);
>
>



More information about the mesa-dev mailing list