[Mesa-dev] [PATCH] intel: Fix render-to-texture in non-FinishRenderTexture cases.

Ian Romanick idr at freedesktop.org
Tue May 7 16:46:55 PDT 2013


On 05/07/2013 03:53 PM, Eric Anholt wrote:
> With EGL_KHR_gl_renderbuffer_iamge, we have the ability to render to
                                ^^^^^
image

> renderbuffers that are also textures, so the core Mesa FinishRenderTexture
> hook doesn't get called.  That hook also wasn't called in various cases
> within the driver where we'd update texture contents using the render
> cache (like glCopyTexSubImage) that resulted in
> intel_batchbuffer_emit_mi_flush().
>
> To fix it, track a set of rendered-to BOs in our context, which is
> cleared at batch wrap or emit_mi_flush time, and do an emit_mi_flush if
> one of our textures is in that set.

That sounds like an optimal (if complex) solution for the EGLimage. 
When Ken described the bug to me, I was envisioning a simpler fix:  if a 
renderbuffer is attached to an EGLimage, use FinishRenderTexture.  Out 
of curiosity, do you think that would have also worked?  It seems like 
that would be cheaper in the non-EGLimage case, though it doesn't sound 
like it matters.

> This change doesn't turn the other emit_mi_flushes (such as intel_blit.c
> operations) into render_cache_set operations yet, as that would increase the
> size of our set and we expect that those operations get immediately flushed
> anyway.
>
> No statistically significant performance difference in cairo-gl (n=53/54, slow
> turbo outliers removed), despite spending ~1% CPU in these set operations.
>
> Fixes piglit EGL_KHR_gl_renderbuffer_image/renderbuffer-texture.

Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>

NOTE: This is a candidate for the 9.1 branch.

> ---
>   src/mesa/drivers/dri/i915/i830_texstate.c      |  3 ++
>   src/mesa/drivers/dri/i915/i915_texstate.c      |  3 ++
>   src/mesa/drivers/dri/i915/intel_tris.c         | 22 ++++++++++++
>   src/mesa/drivers/dri/i965/brw_draw.c           | 23 +++++++++---
>   src/mesa/drivers/dri/i965/brw_misc_state.c     |  5 +++
>   src/mesa/drivers/dri/intel/intel_batchbuffer.c |  5 +++
>   src/mesa/drivers/dri/intel/intel_context.h     |  7 ++++
>   src/mesa/drivers/dri/intel/intel_fbo.c         | 49 ++++++++++++++++++++++----
>   src/mesa/drivers/dri/intel/intel_fbo.h         |  6 ++++
>   9 files changed, 112 insertions(+), 11 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c
> index f186fac..6b1dbf0 100644
> --- a/src/mesa/drivers/dri/i915/i830_texstate.c
> +++ b/src/mesa/drivers/dri/i915/i830_texstate.c
> @@ -33,6 +33,7 @@
>
>   #include "intel_mipmap_tree.h"
>   #include "intel_tex.h"
> +#include "intel_fbo.h"
>
>   #include "i830_context.h"
>   #include "i830_reg.h"
> @@ -128,6 +129,8 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
>      GLubyte border[4];
>      GLuint dst_x, dst_y;
>
> +   intel_render_cache_set_check_flush(intel, intelObj->mt->region->bo);
> +
>      memset(state, 0, sizeof(*state));
>
>      /*We need to refcount these. */
> diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c
> index 43c802b..148da15 100644
> --- a/src/mesa/drivers/dri/i915/i915_texstate.c
> +++ b/src/mesa/drivers/dri/i915/i915_texstate.c
> @@ -33,6 +33,7 @@
>
>   #include "intel_mipmap_tree.h"
>   #include "intel_tex.h"
> +#include "intel_fbo.h"
>
>   #include "i915_context.h"
>   #include "i915_reg.h"
> @@ -151,6 +152,8 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
>      GLubyte border[4];
>      GLfloat maxlod;
>
> +   intel_render_cache_set_check_flush(intel, intelObj->mt->region->bo);
> +
>      memset(state, 0, sizeof(*state));
>
>      /*We need to refcount these. */
> diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c
> index 7c60d84..1f27243 100644
> --- a/src/mesa/drivers/dri/i915/intel_tris.c
> +++ b/src/mesa/drivers/dri/i915/intel_tris.c
> @@ -52,6 +52,8 @@
>   #include "intel_batchbuffer.h"
>   #include "intel_buffers.h"
>   #include "intel_reg.h"
> +#include "intel_fbo.h"
> +#include "intel_mipmap_tree.h"
>   #include "i830_context.h"
>   #include "i830_reg.h"
>   #include "i915_context.h"
> @@ -61,6 +63,22 @@ static void intelRasterPrimitive(struct gl_context * ctx, GLenum rprim,
>                                    GLuint hwprim);
>
>   static void
> +mark_render_cache(struct intel_context *intel)
> +{
> +   struct gl_context *ctx = &intel->ctx;
> +   struct gl_framebuffer *fb = ctx->DrawBuffer;
> +   struct intel_renderbuffer *depth_irb =
> +      intel_get_renderbuffer(fb, BUFFER_DEPTH);
> +   struct intel_renderbuffer *color_irb =
> +      intel_renderbuffer(fb->_ColorDrawBuffers[0]);
> +
> +   if (color_irb)
> +      intel_render_cache_set_add_bo(intel, color_irb->mt->region->bo);
> +   if (depth_irb)
> +      intel_render_cache_set_add_bo(intel, depth_irb->mt->region->bo);
> +}
> +
> +static void
>   intel_flush_inline_primitive(struct intel_context *intel)
>   {
>      GLuint used = intel->batch.used - intel->prim.start_ptr;
> @@ -75,6 +93,8 @@ intel_flush_inline_primitive(struct intel_context *intel)
>      intel->batch.map[intel->prim.start_ptr] =
>         _3DPRIMITIVE | intel->prim.primitive | (used - 2);
>
> +   mark_render_cache(intel);
> +
>      goto finished;
>
>    do_discard:
> @@ -310,6 +330,8 @@ void intel_flush_prim(struct intel_context *intel)
>         ADVANCE_BATCH();
>      }
>
> +   mark_render_cache(intel);
> +
>      if (intel->always_flush_cache) {
>         intel_batchbuffer_emit_mi_flush(intel);
>      }
> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
> index 8c37e0b..670d648 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> @@ -292,8 +292,8 @@ static void brw_merge_inputs( struct brw_context *brw,
>   /*
>    * \brief Resolve buffers before drawing.
>    *
> - * Resolve the depth buffer's HiZ buffer and resolve the depth buffer of each
> - * enabled depth texture.
> + * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
> + * enabled depth texture, and flush the render cache for any dirty textures.
>    *
>    * (In the future, this will also perform MSAA resolves).
>    */
> @@ -310,7 +310,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
>      if (depth_irb)
>         intel_renderbuffer_resolve_hiz(intel, depth_irb);
>
> -   /* Resolve depth buffer of each enabled depth texture. */
> +   /* Resolve depth buffer and render cache of each enabled texture. */
>      for (int i = 0; i < BRW_MAX_TEX_UNIT; i++) {
>         if (!ctx->Texture.Unit[i]._ReallyEnabled)
>   	 continue;
> @@ -318,6 +318,8 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
>         if (!tex_obj || !tex_obj->mt)
>   	 continue;
>         intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt);
> +
> +      intel_render_cache_set_check_flush(intel, tex_obj->mt->region->bo);
>      }
>   }
>
> @@ -329,6 +331,9 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
>    *
>    * If the color buffer is a multisample window system buffer, then
>    * mark that it needs a downsample.
> + *
> + * For all render targets that might be textured (everything but stencil), mark
> + * that it needs a render cache flush if it will be textured.

We will eventually support stencil texturing, but it looks like that 
will be trivial to add here.  Yeah?

>    */
>   static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
>   {
> @@ -347,8 +352,18 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
>         intel_renderbuffer_set_needs_downsample(front_irb);
>      if (back_irb)
>         intel_renderbuffer_set_needs_downsample(back_irb);
> -   if (depth_irb && ctx->Depth.Mask)
> +   if (depth_irb && ctx->Depth.Mask) {
>         intel_renderbuffer_set_needs_depth_resolve(depth_irb);
> +      intel_render_cache_set_add_bo(intel, depth_irb->mt->region->bo);
> +   }
> +
> +   for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
> +      struct intel_renderbuffer *irb =
> +         intel_renderbuffer(fb->_ColorDrawBuffers[i]);
> +
> +      if (irb)
> +         intel_render_cache_set_add_bo(intel, irb->mt->region->bo);
> +   }
>   }
>
>   static int
> diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
> index 6b61929..d7dafe2 100644
> --- a/src/mesa/drivers/dri/i965/brw_misc_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
> @@ -641,6 +641,11 @@ brw_emit_depthbuffer(struct brw_context *brw)
>         height = stencil_irb->Base.Base.Height;
>      }
>
> +   if (depth_mt)
> +      intel_render_cache_set_check_flush(intel, depth_mt->region->bo);
> +   if (stencil_mt)
> +      intel_render_cache_set_check_flush(intel, stencil_mt->region->bo);
> +
>      intel->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
>                                         depthbuffer_format, depth_surface_type,
>                                         stencil_mt, hiz, separate_stencil,
> diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
> index 8c6524e..a3ec89a 100644
> --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
> @@ -31,6 +31,7 @@
>   #include "intel_reg.h"
>   #include "intel_bufmgr.h"
>   #include "intel_buffers.h"
> +#include "intel_fbo.h"
>
>   static void
>   intel_batchbuffer_reset(struct intel_context *intel);
> @@ -273,6 +274,8 @@ _intel_batchbuffer_flush(struct intel_context *intel,
>       */
>      intel_batchbuffer_reset(intel);
>
> +   intel_render_cache_set_clear(intel);
> +
>      return ret;
>   }
>
> @@ -557,4 +560,6 @@ intel_batchbuffer_emit_mi_flush(struct intel_context *intel)
>         OUT_BATCH(MI_FLUSH);
>         ADVANCE_BATCH();
>      }
> +
> +   intel_render_cache_set_clear(intel);
>   }
> diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
> index c0f07ff..3cf7f32 100644
> --- a/src/mesa/drivers/dri/intel/intel_context.h
> +++ b/src/mesa/drivers/dri/intel/intel_context.h
> @@ -249,6 +249,13 @@ struct intel_context
>
>      drm_intel_context *hw_ctx;
>
> +   /**
> +    * Set of drm_intel_bo * that have been rendered to within this batchbuffer
> +    * and would need flushing before being used from another cache domain that
> +    * isn't coherent with it (i.e. the sampler).
> +    */
> +   struct set *render_cache;
> +
>      struct intel_batchbuffer batch;
>
>      drm_intel_bo *first_post_swapbuffers_batch;
> diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
> index 2e36b0b..a621c2e 100644
> --- a/src/mesa/drivers/dri/intel/intel_fbo.c
> +++ b/src/mesa/drivers/dri/intel/intel_fbo.c
> @@ -36,6 +36,8 @@
>   #include "main/context.h"
>   #include "main/teximage.h"
>   #include "main/image.h"
> +#include "main/hash_table.h"
> +#include "main/set.h"
>
>   #include "swrast/swrast.h"
>   #include "drivers/common/meta.h"
> @@ -631,13 +633,6 @@ intel_finish_render_texture(struct gl_context * ctx,
>
>      if (irb)
>         irb->tex_image = NULL;
> -
> -   /* Since we've (probably) rendered to the texture and will (likely) use
> -    * it in the texture domain later on in this batchbuffer, flush the
> -    * batch.  Once again, we wish for a domain tracker in libdrm to cover
> -    * usage inside of a batchbuffer like GEM does in the kernel.
> -    */
> -   intel_batchbuffer_emit_mi_flush(intel);
>   }
>
>   #define fbo_incomplete(fb, ...) do {                                          \
> @@ -974,6 +969,44 @@ intel_renderbuffer_move_to_temp(struct intel_context *intel,
>      intel_miptree_release(&new_mt);
>   }
>
> +void
> +intel_render_cache_set_clear(struct intel_context *intel)
> +{
> +   struct set_entry *entry;
> +
> +   set_foreach(intel->render_cache, entry) {
> +      _mesa_set_remove(intel->render_cache, entry);
> +   }
> +}
> +
> +void
> +intel_render_cache_set_add_bo(struct intel_context *intel, drm_intel_bo *bo)
> +{
> +   _mesa_set_add(intel->render_cache, _mesa_hash_pointer(bo), bo);
> +}
> +
> +/**
> + * Emits an appropriate flush for a BO if it has been rendered to within the
> + * same batchbuffer as a read that's about to be emitted.
> + *
> + * The GPU has separate, incoherent caches for the render cache and the
> + * sampler cache, along with other caches.  Usually data in the different
> + * caches don't interact (e.g. we don't render to our driver-generated
> + * immediate constant data), but for render-to-texture in FBOs we definitely
> + * do.  When a batchbuffer is flushed, the kernel will ensure that everything
> + * necessary is flushed before another use of that BO, but for reuse from
> + * different caches within a batchbuffer, it's all our responsibility.
> + */
> +void
> +intel_render_cache_set_check_flush(struct intel_context *intel,
> +                                   drm_intel_bo *bo)
> +{
> +   if (!_mesa_set_search(intel->render_cache, _mesa_hash_pointer(bo), bo))
> +       return;
> +
> +   intel_batchbuffer_emit_mi_flush(intel);
> +}
> +
>   /**
>    * Do one-time context initializations related to GL_EXT_framebuffer_object.
>    * Hook in device driver functions.
> @@ -994,4 +1027,6 @@ intel_fbo_init(struct intel_context *intel)
>      intel->ctx.Driver.BlitFramebuffer = intel_blit_framebuffer;
>      intel->ctx.Driver.EGLImageTargetRenderbufferStorage =
>         intel_image_target_renderbuffer_storage;
> +
> +   intel->render_cache = _mesa_set_create(intel, _mesa_key_pointer_equal);
>   }
> diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
> index 0e0806b..463b5bc 100644
> --- a/src/mesa/drivers/dri/intel/intel_fbo.h
> +++ b/src/mesa/drivers/dri/intel/intel_fbo.h
> @@ -200,6 +200,12 @@ void intel_renderbuffer_move_to_temp(struct intel_context *intel,
>                                        struct intel_renderbuffer *irb,
>                                        bool invalidate);
>
> +void intel_render_cache_set_clear(struct intel_context *intel);
> +void intel_render_cache_set_add_bo(struct intel_context *intel,
> +                                   drm_intel_bo *bo);
> +void intel_render_cache_set_check_flush(struct intel_context *intel,
> +                                        drm_intel_bo *bo);
> +
>   unsigned
>   intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples);
>
>



More information about the mesa-dev mailing list