[Mesa-dev] [PATCH] intel: Fix render-to-texture in non-FinishRenderTexture cases.

Eric Anholt eric at anholt.net
Tue May 7 15:53:13 PDT 2013


With EGL_KHR_gl_renderbuffer_iamge, we have the ability to render to
renderbuffers that are also textures, so the core Mesa FinishRenderTexture
hook doesn't get called.  That hook also wasn't called in various cases
within the driver where we'd update texture contents using the render
cache (like glCopyTexSubImage) that resulted in
intel_batchbuffer_emit_mi_flush().

To fix it, track a set of rendered-to BOs in our context, which is
cleared at batch wrap or emit_mi_flush time, and do an emit_mi_flush if
one of our textures is in that set.

This change doesn't turn the other emit_mi_flushes (such as intel_blit.c
operations) into render_cache_set operations yet, as that would increase the
size of our set and we expect that those operations get immediately flushed
anyway.

No statistically significant performance difference in cairo-gl (n=53/54, slow
turbo outliers removed), despite spending ~1% CPU in these set operations.

Fixes piglit EGL_KHR_gl_renderbuffer_image/renderbuffer-texture.
---
 src/mesa/drivers/dri/i915/i830_texstate.c      |  3 ++
 src/mesa/drivers/dri/i915/i915_texstate.c      |  3 ++
 src/mesa/drivers/dri/i915/intel_tris.c         | 22 ++++++++++++
 src/mesa/drivers/dri/i965/brw_draw.c           | 23 +++++++++---
 src/mesa/drivers/dri/i965/brw_misc_state.c     |  5 +++
 src/mesa/drivers/dri/intel/intel_batchbuffer.c |  5 +++
 src/mesa/drivers/dri/intel/intel_context.h     |  7 ++++
 src/mesa/drivers/dri/intel/intel_fbo.c         | 49 ++++++++++++++++++++++----
 src/mesa/drivers/dri/intel/intel_fbo.h         |  6 ++++
 9 files changed, 112 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c
index f186fac..6b1dbf0 100644
--- a/src/mesa/drivers/dri/i915/i830_texstate.c
+++ b/src/mesa/drivers/dri/i915/i830_texstate.c
@@ -33,6 +33,7 @@
 
 #include "intel_mipmap_tree.h"
 #include "intel_tex.h"
+#include "intel_fbo.h"
 
 #include "i830_context.h"
 #include "i830_reg.h"
@@ -128,6 +129,8 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
    GLubyte border[4];
    GLuint dst_x, dst_y;
 
+   intel_render_cache_set_check_flush(intel, intelObj->mt->region->bo);
+
    memset(state, 0, sizeof(*state));
 
    /*We need to refcount these. */
diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c
index 43c802b..148da15 100644
--- a/src/mesa/drivers/dri/i915/i915_texstate.c
+++ b/src/mesa/drivers/dri/i915/i915_texstate.c
@@ -33,6 +33,7 @@
 
 #include "intel_mipmap_tree.h"
 #include "intel_tex.h"
+#include "intel_fbo.h"
 
 #include "i915_context.h"
 #include "i915_reg.h"
@@ -151,6 +152,8 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
    GLubyte border[4];
    GLfloat maxlod;
 
+   intel_render_cache_set_check_flush(intel, intelObj->mt->region->bo);
+
    memset(state, 0, sizeof(*state));
 
    /*We need to refcount these. */
diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c
index 7c60d84..1f27243 100644
--- a/src/mesa/drivers/dri/i915/intel_tris.c
+++ b/src/mesa/drivers/dri/i915/intel_tris.c
@@ -52,6 +52,8 @@
 #include "intel_batchbuffer.h"
 #include "intel_buffers.h"
 #include "intel_reg.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
 #include "i830_context.h"
 #include "i830_reg.h"
 #include "i915_context.h"
@@ -61,6 +63,22 @@ static void intelRasterPrimitive(struct gl_context * ctx, GLenum rprim,
                                  GLuint hwprim);
 
 static void
+mark_render_cache(struct intel_context *intel)
+{
+   struct gl_context *ctx = &intel->ctx;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct intel_renderbuffer *depth_irb =
+      intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   struct intel_renderbuffer *color_irb =
+      intel_renderbuffer(fb->_ColorDrawBuffers[0]);
+
+   if (color_irb)
+      intel_render_cache_set_add_bo(intel, color_irb->mt->region->bo);
+   if (depth_irb)
+      intel_render_cache_set_add_bo(intel, depth_irb->mt->region->bo);
+}
+
+static void
 intel_flush_inline_primitive(struct intel_context *intel)
 {
    GLuint used = intel->batch.used - intel->prim.start_ptr;
@@ -75,6 +93,8 @@ intel_flush_inline_primitive(struct intel_context *intel)
    intel->batch.map[intel->prim.start_ptr] =
       _3DPRIMITIVE | intel->prim.primitive | (used - 2);
 
+   mark_render_cache(intel);
+
    goto finished;
 
  do_discard:
@@ -310,6 +330,8 @@ void intel_flush_prim(struct intel_context *intel)
       ADVANCE_BATCH();
    }
 
+   mark_render_cache(intel);
+
    if (intel->always_flush_cache) {
       intel_batchbuffer_emit_mi_flush(intel);
    }
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 8c37e0b..670d648 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -292,8 +292,8 @@ static void brw_merge_inputs( struct brw_context *brw,
 /*
  * \brief Resolve buffers before drawing.
  *
- * Resolve the depth buffer's HiZ buffer and resolve the depth buffer of each
- * enabled depth texture.
+ * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
+ * enabled depth texture, and flush the render cache for any dirty textures.
  *
  * (In the future, this will also perform MSAA resolves).
  */
@@ -310,7 +310,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
    if (depth_irb)
       intel_renderbuffer_resolve_hiz(intel, depth_irb);
 
-   /* Resolve depth buffer of each enabled depth texture. */
+   /* Resolve depth buffer and render cache of each enabled texture. */
    for (int i = 0; i < BRW_MAX_TEX_UNIT; i++) {
       if (!ctx->Texture.Unit[i]._ReallyEnabled)
 	 continue;
@@ -318,6 +318,8 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
       if (!tex_obj || !tex_obj->mt)
 	 continue;
       intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt);
+
+      intel_render_cache_set_check_flush(intel, tex_obj->mt->region->bo);
    }
 }
 
@@ -329,6 +331,9 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
  *
  * If the color buffer is a multisample window system buffer, then
  * mark that it needs a downsample.
+ *
+ * For all render targets that might be textured (everything but stencil), mark
+ * that it needs a render cache flush if it will be textured.
  */
 static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
 {
@@ -347,8 +352,18 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
       intel_renderbuffer_set_needs_downsample(front_irb);
    if (back_irb)
       intel_renderbuffer_set_needs_downsample(back_irb);
-   if (depth_irb && ctx->Depth.Mask)
+   if (depth_irb && ctx->Depth.Mask) {
       intel_renderbuffer_set_needs_depth_resolve(depth_irb);
+      intel_render_cache_set_add_bo(intel, depth_irb->mt->region->bo);
+   }
+
+   for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
+      struct intel_renderbuffer *irb =
+         intel_renderbuffer(fb->_ColorDrawBuffers[i]);
+
+      if (irb)
+         intel_render_cache_set_add_bo(intel, irb->mt->region->bo);
+   }
 }
 
 static int
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 6b61929..d7dafe2 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -641,6 +641,11 @@ brw_emit_depthbuffer(struct brw_context *brw)
       height = stencil_irb->Base.Base.Height;
    }
 
+   if (depth_mt)
+      intel_render_cache_set_check_flush(intel, depth_mt->region->bo);
+   if (stencil_mt)
+      intel_render_cache_set_check_flush(intel, stencil_mt->region->bo);
+
    intel->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
                                       depthbuffer_format, depth_surface_type,
                                       stencil_mt, hiz, separate_stencil,
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 8c6524e..a3ec89a 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -31,6 +31,7 @@
 #include "intel_reg.h"
 #include "intel_bufmgr.h"
 #include "intel_buffers.h"
+#include "intel_fbo.h"
 
 static void
 intel_batchbuffer_reset(struct intel_context *intel);
@@ -273,6 +274,8 @@ _intel_batchbuffer_flush(struct intel_context *intel,
     */
    intel_batchbuffer_reset(intel);
 
+   intel_render_cache_set_clear(intel);
+
    return ret;
 }
 
@@ -557,4 +560,6 @@ intel_batchbuffer_emit_mi_flush(struct intel_context *intel)
       OUT_BATCH(MI_FLUSH);
       ADVANCE_BATCH();
    }
+
+   intel_render_cache_set_clear(intel);
 }
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index c0f07ff..3cf7f32 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -249,6 +249,13 @@ struct intel_context
 
    drm_intel_context *hw_ctx;
 
+   /**
+    * Set of drm_intel_bo * that have been rendered to within this batchbuffer
+    * and would need flushing before being used from another cache domain that
+    * isn't coherent with it (i.e. the sampler).
+    */
+   struct set *render_cache;
+
    struct intel_batchbuffer batch;
 
    drm_intel_bo *first_post_swapbuffers_batch;
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 2e36b0b..a621c2e 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -36,6 +36,8 @@
 #include "main/context.h"
 #include "main/teximage.h"
 #include "main/image.h"
+#include "main/hash_table.h"
+#include "main/set.h"
 
 #include "swrast/swrast.h"
 #include "drivers/common/meta.h"
@@ -631,13 +633,6 @@ intel_finish_render_texture(struct gl_context * ctx,
 
    if (irb)
       irb->tex_image = NULL;
-
-   /* Since we've (probably) rendered to the texture and will (likely) use
-    * it in the texture domain later on in this batchbuffer, flush the
-    * batch.  Once again, we wish for a domain tracker in libdrm to cover
-    * usage inside of a batchbuffer like GEM does in the kernel.
-    */
-   intel_batchbuffer_emit_mi_flush(intel);
 }
 
 #define fbo_incomplete(fb, ...) do {                                          \
@@ -974,6 +969,44 @@ intel_renderbuffer_move_to_temp(struct intel_context *intel,
    intel_miptree_release(&new_mt);
 }
 
+void
+intel_render_cache_set_clear(struct intel_context *intel)
+{
+   struct set_entry *entry;
+
+   set_foreach(intel->render_cache, entry) {
+      _mesa_set_remove(intel->render_cache, entry);
+   }
+}
+
+void
+intel_render_cache_set_add_bo(struct intel_context *intel, drm_intel_bo *bo)
+{
+   _mesa_set_add(intel->render_cache, _mesa_hash_pointer(bo), bo);
+}
+
+/**
+ * Emits an appropriate flush for a BO if it has been rendered to within the
+ * same batchbuffer as a read that's about to be emitted.
+ *
+ * The GPU has separate, incoherent caches for the render cache and the
+ * sampler cache, along with other caches.  Usually data in the different
+ * caches don't interact (e.g. we don't render to our driver-generated
+ * immediate constant data), but for render-to-texture in FBOs we definitely
+ * do.  When a batchbuffer is flushed, the kernel will ensure that everything
+ * necessary is flushed before another use of that BO, but for reuse from
+ * different caches within a batchbuffer, it's all our responsibility.
+ */
+void
+intel_render_cache_set_check_flush(struct intel_context *intel,
+                                   drm_intel_bo *bo)
+{
+   if (!_mesa_set_search(intel->render_cache, _mesa_hash_pointer(bo), bo))
+       return;
+
+   intel_batchbuffer_emit_mi_flush(intel);
+}
+
 /**
  * Do one-time context initializations related to GL_EXT_framebuffer_object.
  * Hook in device driver functions.
@@ -994,4 +1027,6 @@ intel_fbo_init(struct intel_context *intel)
    intel->ctx.Driver.BlitFramebuffer = intel_blit_framebuffer;
    intel->ctx.Driver.EGLImageTargetRenderbufferStorage =
       intel_image_target_renderbuffer_storage;
+
+   intel->render_cache = _mesa_set_create(intel, _mesa_key_pointer_equal);
 }
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
index 0e0806b..463b5bc 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.h
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@@ -200,6 +200,12 @@ void intel_renderbuffer_move_to_temp(struct intel_context *intel,
                                      struct intel_renderbuffer *irb,
                                      bool invalidate);
 
+void intel_render_cache_set_clear(struct intel_context *intel);
+void intel_render_cache_set_add_bo(struct intel_context *intel,
+                                   drm_intel_bo *bo);
+void intel_render_cache_set_check_flush(struct intel_context *intel,
+                                        drm_intel_bo *bo);
+
 unsigned
 intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples);
 
-- 
1.8.3.rc0



More information about the mesa-dev mailing list