[Mesa-dev] [PATCH] intel: Fix render-to-texture in non-FinishRenderTexture cases.
Ian Romanick
idr at freedesktop.org
Tue May 7 16:46:55 PDT 2013
On 05/07/2013 03:53 PM, Eric Anholt wrote:
> With EGL_KHR_gl_renderbuffer_iamge, we have the ability to render to
^^^^^
image
> renderbuffers that are also textures, so the core Mesa FinishRenderTexture
> hook doesn't get called. That hook also wasn't called in various cases
> within the driver where we'd update texture contents using the render
> cache (like glCopyTexSubImage) that resulted in
> intel_batchbuffer_emit_mi_flush().
>
> To fix it, track a set of rendered-to BOs in our context, which is
> cleared at batch wrap or emit_mi_flush time, and do an emit_mi_flush if
> one of our textures is in that set.
That sounds like an optimal (if complex) solution for the EGLimage.
When Ken described the bug to me, I was envisioning a simpler fix: if a
renderbuffer is attached to an EGLimage, use FinishRenderTexture. Out
of curiosity, do you think that would have also worked? It seems like
that would be cheaper in the non-EGLimage case, though it doesn't sound
like it matters.
> This change doesn't turn the other emit_mi_flushes (such as intel_blit.c
> operations) into render_cache_set operations yet, as that would increase the
> size of our set and we expect that those operations get immediately flushed
> anyway.
>
> No statistically significant performance difference in cairo-gl (n=53/54, slow
> turbo outliers removed), despite spending ~1% CPU in these set operations.
>
> Fixes piglit EGL_KHR_gl_renderbuffer_image/renderbuffer-texture.
Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>
NOTE: This is a candidate for the 9.1 branch.
> ---
> src/mesa/drivers/dri/i915/i830_texstate.c | 3 ++
> src/mesa/drivers/dri/i915/i915_texstate.c | 3 ++
> src/mesa/drivers/dri/i915/intel_tris.c | 22 ++++++++++++
> src/mesa/drivers/dri/i965/brw_draw.c | 23 +++++++++---
> src/mesa/drivers/dri/i965/brw_misc_state.c | 5 +++
> src/mesa/drivers/dri/intel/intel_batchbuffer.c | 5 +++
> src/mesa/drivers/dri/intel/intel_context.h | 7 ++++
> src/mesa/drivers/dri/intel/intel_fbo.c | 49 ++++++++++++++++++++++----
> src/mesa/drivers/dri/intel/intel_fbo.h | 6 ++++
> 9 files changed, 112 insertions(+), 11 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c
> index f186fac..6b1dbf0 100644
> --- a/src/mesa/drivers/dri/i915/i830_texstate.c
> +++ b/src/mesa/drivers/dri/i915/i830_texstate.c
> @@ -33,6 +33,7 @@
>
> #include "intel_mipmap_tree.h"
> #include "intel_tex.h"
> +#include "intel_fbo.h"
>
> #include "i830_context.h"
> #include "i830_reg.h"
> @@ -128,6 +129,8 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
> GLubyte border[4];
> GLuint dst_x, dst_y;
>
> + intel_render_cache_set_check_flush(intel, intelObj->mt->region->bo);
> +
> memset(state, 0, sizeof(*state));
>
> /*We need to refcount these. */
> diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c
> index 43c802b..148da15 100644
> --- a/src/mesa/drivers/dri/i915/i915_texstate.c
> +++ b/src/mesa/drivers/dri/i915/i915_texstate.c
> @@ -33,6 +33,7 @@
>
> #include "intel_mipmap_tree.h"
> #include "intel_tex.h"
> +#include "intel_fbo.h"
>
> #include "i915_context.h"
> #include "i915_reg.h"
> @@ -151,6 +152,8 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
> GLubyte border[4];
> GLfloat maxlod;
>
> + intel_render_cache_set_check_flush(intel, intelObj->mt->region->bo);
> +
> memset(state, 0, sizeof(*state));
>
> /*We need to refcount these. */
> diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c
> index 7c60d84..1f27243 100644
> --- a/src/mesa/drivers/dri/i915/intel_tris.c
> +++ b/src/mesa/drivers/dri/i915/intel_tris.c
> @@ -52,6 +52,8 @@
> #include "intel_batchbuffer.h"
> #include "intel_buffers.h"
> #include "intel_reg.h"
> +#include "intel_fbo.h"
> +#include "intel_mipmap_tree.h"
> #include "i830_context.h"
> #include "i830_reg.h"
> #include "i915_context.h"
> @@ -61,6 +63,22 @@ static void intelRasterPrimitive(struct gl_context * ctx, GLenum rprim,
> GLuint hwprim);
>
> static void
> +mark_render_cache(struct intel_context *intel)
> +{
> + struct gl_context *ctx = &intel->ctx;
> + struct gl_framebuffer *fb = ctx->DrawBuffer;
> + struct intel_renderbuffer *depth_irb =
> + intel_get_renderbuffer(fb, BUFFER_DEPTH);
> + struct intel_renderbuffer *color_irb =
> + intel_renderbuffer(fb->_ColorDrawBuffers[0]);
> +
> + if (color_irb)
> + intel_render_cache_set_add_bo(intel, color_irb->mt->region->bo);
> + if (depth_irb)
> + intel_render_cache_set_add_bo(intel, depth_irb->mt->region->bo);
> +}
> +
> +static void
> intel_flush_inline_primitive(struct intel_context *intel)
> {
> GLuint used = intel->batch.used - intel->prim.start_ptr;
> @@ -75,6 +93,8 @@ intel_flush_inline_primitive(struct intel_context *intel)
> intel->batch.map[intel->prim.start_ptr] =
> _3DPRIMITIVE | intel->prim.primitive | (used - 2);
>
> + mark_render_cache(intel);
> +
> goto finished;
>
> do_discard:
> @@ -310,6 +330,8 @@ void intel_flush_prim(struct intel_context *intel)
> ADVANCE_BATCH();
> }
>
> + mark_render_cache(intel);
> +
> if (intel->always_flush_cache) {
> intel_batchbuffer_emit_mi_flush(intel);
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
> index 8c37e0b..670d648 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> @@ -292,8 +292,8 @@ static void brw_merge_inputs( struct brw_context *brw,
> /*
> * \brief Resolve buffers before drawing.
> *
> - * Resolve the depth buffer's HiZ buffer and resolve the depth buffer of each
> - * enabled depth texture.
> + * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
> + * enabled depth texture, and flush the render cache for any dirty textures.
> *
> * (In the future, this will also perform MSAA resolves).
> */
> @@ -310,7 +310,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
> if (depth_irb)
> intel_renderbuffer_resolve_hiz(intel, depth_irb);
>
> - /* Resolve depth buffer of each enabled depth texture. */
> + /* Resolve depth buffer and render cache of each enabled texture. */
> for (int i = 0; i < BRW_MAX_TEX_UNIT; i++) {
> if (!ctx->Texture.Unit[i]._ReallyEnabled)
> continue;
> @@ -318,6 +318,8 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
> if (!tex_obj || !tex_obj->mt)
> continue;
> intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt);
> +
> + intel_render_cache_set_check_flush(intel, tex_obj->mt->region->bo);
> }
> }
>
> @@ -329,6 +331,9 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
> *
> * If the color buffer is a multisample window system buffer, then
> * mark that it needs a downsample.
> + *
> + * For all render targets that might be textured (everything but stencil), mark
> + * that it needs a render cache flush if it will be textured.
We will eventually support stencil texturing, but it looks like that
will be trivial to add here. Yeah?
> */
> static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
> {
> @@ -347,8 +352,18 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
> intel_renderbuffer_set_needs_downsample(front_irb);
> if (back_irb)
> intel_renderbuffer_set_needs_downsample(back_irb);
> - if (depth_irb && ctx->Depth.Mask)
> + if (depth_irb && ctx->Depth.Mask) {
> intel_renderbuffer_set_needs_depth_resolve(depth_irb);
> + intel_render_cache_set_add_bo(intel, depth_irb->mt->region->bo);
> + }
> +
> + for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
> + struct intel_renderbuffer *irb =
> + intel_renderbuffer(fb->_ColorDrawBuffers[i]);
> +
> + if (irb)
> + intel_render_cache_set_add_bo(intel, irb->mt->region->bo);
> + }
> }
>
> static int
> diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
> index 6b61929..d7dafe2 100644
> --- a/src/mesa/drivers/dri/i965/brw_misc_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
> @@ -641,6 +641,11 @@ brw_emit_depthbuffer(struct brw_context *brw)
> height = stencil_irb->Base.Base.Height;
> }
>
> + if (depth_mt)
> + intel_render_cache_set_check_flush(intel, depth_mt->region->bo);
> + if (stencil_mt)
> + intel_render_cache_set_check_flush(intel, stencil_mt->region->bo);
> +
> intel->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
> depthbuffer_format, depth_surface_type,
> stencil_mt, hiz, separate_stencil,
> diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
> index 8c6524e..a3ec89a 100644
> --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
> @@ -31,6 +31,7 @@
> #include "intel_reg.h"
> #include "intel_bufmgr.h"
> #include "intel_buffers.h"
> +#include "intel_fbo.h"
>
> static void
> intel_batchbuffer_reset(struct intel_context *intel);
> @@ -273,6 +274,8 @@ _intel_batchbuffer_flush(struct intel_context *intel,
> */
> intel_batchbuffer_reset(intel);
>
> + intel_render_cache_set_clear(intel);
> +
> return ret;
> }
>
> @@ -557,4 +560,6 @@ intel_batchbuffer_emit_mi_flush(struct intel_context *intel)
> OUT_BATCH(MI_FLUSH);
> ADVANCE_BATCH();
> }
> +
> + intel_render_cache_set_clear(intel);
> }
> diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
> index c0f07ff..3cf7f32 100644
> --- a/src/mesa/drivers/dri/intel/intel_context.h
> +++ b/src/mesa/drivers/dri/intel/intel_context.h
> @@ -249,6 +249,13 @@ struct intel_context
>
> drm_intel_context *hw_ctx;
>
> + /**
> + * Set of drm_intel_bo * that have been rendered to within this batchbuffer
> + * and would need flushing before being used from another cache domain that
> + * isn't coherent with it (i.e. the sampler).
> + */
> + struct set *render_cache;
> +
> struct intel_batchbuffer batch;
>
> drm_intel_bo *first_post_swapbuffers_batch;
> diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
> index 2e36b0b..a621c2e 100644
> --- a/src/mesa/drivers/dri/intel/intel_fbo.c
> +++ b/src/mesa/drivers/dri/intel/intel_fbo.c
> @@ -36,6 +36,8 @@
> #include "main/context.h"
> #include "main/teximage.h"
> #include "main/image.h"
> +#include "main/hash_table.h"
> +#include "main/set.h"
>
> #include "swrast/swrast.h"
> #include "drivers/common/meta.h"
> @@ -631,13 +633,6 @@ intel_finish_render_texture(struct gl_context * ctx,
>
> if (irb)
> irb->tex_image = NULL;
> -
> - /* Since we've (probably) rendered to the texture and will (likely) use
> - * it in the texture domain later on in this batchbuffer, flush the
> - * batch. Once again, we wish for a domain tracker in libdrm to cover
> - * usage inside of a batchbuffer like GEM does in the kernel.
> - */
> - intel_batchbuffer_emit_mi_flush(intel);
> }
>
> #define fbo_incomplete(fb, ...) do { \
> @@ -974,6 +969,44 @@ intel_renderbuffer_move_to_temp(struct intel_context *intel,
> intel_miptree_release(&new_mt);
> }
>
> +void
> +intel_render_cache_set_clear(struct intel_context *intel)
> +{
> + struct set_entry *entry;
> +
> + set_foreach(intel->render_cache, entry) {
> + _mesa_set_remove(intel->render_cache, entry);
> + }
> +}
> +
> +void
> +intel_render_cache_set_add_bo(struct intel_context *intel, drm_intel_bo *bo)
> +{
> + _mesa_set_add(intel->render_cache, _mesa_hash_pointer(bo), bo);
> +}
> +
> +/**
> + * Emits an appropriate flush for a BO if it has been rendered to within the
> + * same batchbuffer as a read that's about to be emitted.
> + *
> + * The GPU has separate, incoherent caches for the render cache and the
> + * sampler cache, along with other caches. Usually data in the different
> + * caches don't interact (e.g. we don't render to our driver-generated
> + * immediate constant data), but for render-to-texture in FBOs we definitely
> + * do. When a batchbuffer is flushed, the kernel will ensure that everything
> + * necessary is flushed before another use of that BO, but for reuse from
> + * different caches within a batchbuffer, it's all our responsibility.
> + */
> +void
> +intel_render_cache_set_check_flush(struct intel_context *intel,
> + drm_intel_bo *bo)
> +{
> + if (!_mesa_set_search(intel->render_cache, _mesa_hash_pointer(bo), bo))
> + return;
> +
> + intel_batchbuffer_emit_mi_flush(intel);
> +}
> +
> /**
> * Do one-time context initializations related to GL_EXT_framebuffer_object.
> * Hook in device driver functions.
> @@ -994,4 +1027,6 @@ intel_fbo_init(struct intel_context *intel)
> intel->ctx.Driver.BlitFramebuffer = intel_blit_framebuffer;
> intel->ctx.Driver.EGLImageTargetRenderbufferStorage =
> intel_image_target_renderbuffer_storage;
> +
> + intel->render_cache = _mesa_set_create(intel, _mesa_key_pointer_equal);
> }
> diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
> index 0e0806b..463b5bc 100644
> --- a/src/mesa/drivers/dri/intel/intel_fbo.h
> +++ b/src/mesa/drivers/dri/intel/intel_fbo.h
> @@ -200,6 +200,12 @@ void intel_renderbuffer_move_to_temp(struct intel_context *intel,
> struct intel_renderbuffer *irb,
> bool invalidate);
>
> +void intel_render_cache_set_clear(struct intel_context *intel);
> +void intel_render_cache_set_add_bo(struct intel_context *intel,
> + drm_intel_bo *bo);
> +void intel_render_cache_set_check_flush(struct intel_context *intel,
> + drm_intel_bo *bo);
> +
> unsigned
> intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples);
>
>
More information about the mesa-dev
mailing list