[Mesa-dev] [PATCH 1/2] i965: Fix render-to-texture in non-FinishRenderTexture cases.
Kenneth Graunke
kenneth at whitecape.org
Wed Mar 5 16:37:17 PST 2014
On 03/05/2014 04:17 PM, Eric Anholt wrote:
> We've had several problems now with FinishRenderTexture not getting called
> enough, and we're ready to just give up on it ever doing what we need. In
> particular, an upcoming Steam title had rendering bugs that could be fixed
> by always_flush_cache=true.
>
> Instead of hoping Mesa core can figure out when we need to flush our
> caches, just track what BOs we've rendered to in a set, and when we render
> from a BO in that set, emit a flush and clear the set.
>
> There's some overhead to keeping this set, but most of that is just
> hashing the pointer -- it turns out our set never even gets very large,
> because cache flushes are so common (even on cairo-gl).
>
> No statistically significant performance difference in cairo-gl (n=100),
> despite spending ~.5% CPU in these set operations.
>
> v1: (Original patch by Eric Anholt.)
> v2: (Changes by Ken Graunke.)
> - Rebase forward from May 7th 2013 -> March 4th 2014.
> - Drop the FinishRenderTexture hook entirely; after rebasing the
> patch, the hook was just an empty function.
> - Move the brw_render_cache_set_clear() call from
> intel_batchbuffer_emit_flush() to brw_emit_pipe_control_flush().
> In theory, this could catch more cases where we've flushed.
> - Consider stencil as a possible texturing source.
> v3: (changes by anholt):
> - Move set_clear() back to emit_mi_flush() -- it means we can drop
> more forced flushes from the code. In the previous location, it
> wouldn't have been called when we wanted pre-gen6.
> - Move the set clear from batch init to reset -- it should be empty at
> the start of every batch, since the kernel handled any inter-batch
> flush for us.
>
> Signed-off-by: Eric Anholt <eric at anholt.net>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
> src/mesa/drivers/dri/i965/brw_context.c | 4 +-
> src/mesa/drivers/dri/i965/brw_context.h | 7 +++
> src/mesa/drivers/dri/i965/brw_draw.c | 30 +++++++++---
> src/mesa/drivers/dri/i965/brw_misc_state.c | 5 ++
> src/mesa/drivers/dri/i965/intel_batchbuffer.c | 4 ++
> src/mesa/drivers/dri/i965/intel_fbo.c | 66 +++++++++++++++++++--------
> src/mesa/drivers/dri/i965/intel_fbo.h | 4 ++
> src/mesa/main/set.c | 6 +++
> 8 files changed, 99 insertions(+), 27 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
> index 1441b46..026b3aa 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -698,6 +698,8 @@ brwCreateContext(gl_api api,
> /* Reinitialize the context point state. It depends on ctx->Const values. */
> _mesa_init_point(ctx);
>
> + intel_fbo_init(brw);
> +
> intel_batchbuffer_init(brw);
>
> if (brw->gen >= 6) {
> @@ -721,8 +723,6 @@ brwCreateContext(gl_api api,
>
> intelInitExtensions(ctx);
>
> - intel_fbo_init(brw);
> -
> brw_init_surface_formats(brw);
>
> if (brw->is_g4x || brw->gen >= 5) {
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index dbb30f2..734d273 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1030,6 +1030,13 @@ struct brw_context
> drm_intel_context *hw_ctx;
>
> /**
> + * Set of drm_intel_bo * that have been rendered to within this batchbuffer
> + * and would need flushing before being used from another cache domain that
> + * isn't coherent with it (i.e. the sampler).
> + */
> + struct set *render_cache;
> +
> + /**
> * Number of resets observed in the system at context creation.
> *
> * This is tracked in the context so that we can determine that another
> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
> index bc887fe..11a0361 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> @@ -297,8 +297,8 @@ static void brw_merge_inputs( struct brw_context *brw,
> /*
> * \brief Resolve buffers before drawing.
> *
> - * Resolve the depth buffer's HiZ buffer and resolve the depth buffer of each
> - * enabled depth texture.
> + * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
> + * enabled depth texture, and flush the render cache for any dirty textures.
> *
> * (In the future, this will also perform MSAA resolves).
> */
> @@ -314,9 +314,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
> if (depth_irb)
> intel_renderbuffer_resolve_hiz(brw, depth_irb);
>
> - /* Resolve depth buffer of each enabled depth texture, and color buffer of
> - * each fast-clear-enabled color texture.
> - */
> + /* Resolve depth buffer and render cache of each enabled texture. */
> for (int i = 0; i < ctx->Const.MaxCombinedTextureImageUnits; i++) {
> if (!ctx->Texture.Unit[i]._ReallyEnabled)
> continue;
> @@ -325,6 +323,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
> continue;
> intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
> intel_miptree_resolve_color(brw, tex_obj->mt);
> + brw_render_cache_set_check_flush(brw, tex_obj->mt->region->bo);
> }
> }
>
> @@ -336,6 +335,9 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
> *
> * If the color buffer is a multisample window system buffer, then
> * mark that it needs a downsample.
> + *
> + * Also mark any render targets which will be textured as needing a render
> + * cache flush.
> */
> static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
> {
> @@ -345,6 +347,7 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
> struct intel_renderbuffer *front_irb = NULL;
> struct intel_renderbuffer *back_irb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
> struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
> + struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
> struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH];
>
> if (brw->is_front_buffer_rendering)
> @@ -354,8 +357,23 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
> front_irb->need_downsample = true;
> if (back_irb)
> back_irb->need_downsample = true;
> - if (depth_irb && ctx->Depth.Mask)
> + if (depth_irb && ctx->Depth.Mask) {
> intel_renderbuffer_att_set_needs_depth_resolve(depth_att);
> + brw_render_cache_set_add_bo(brw, depth_irb->mt->region->bo);
> + }
> +
> + if (ctx->Extensions.ARB_stencil_texturing &&
> + stencil_irb && ctx->Stencil._WriteEnabled) {
> + brw_render_cache_set_add_bo(brw, stencil_irb->mt->region->bo);
> + }
> +
> + for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
> + struct intel_renderbuffer *irb =
> + intel_renderbuffer(fb->_ColorDrawBuffers[i]);
> +
> + if (irb)
> + brw_render_cache_set_add_bo(brw, irb->mt->region->bo);
> + }
> }
>
> /* May fail if out of video memory for texture or vbo upload, or on
> diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
> index b984d47..c8fb6f3 100644
> --- a/src/mesa/drivers/dri/i965/brw_misc_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
> @@ -552,6 +552,11 @@ brw_emit_depthbuffer(struct brw_context *brw)
> height = stencil_irb->Base.Base.Height;
> }
>
> + if (depth_mt)
> + brw_render_cache_set_check_flush(brw, depth_mt->region->bo);
> + if (stencil_mt)
> + brw_render_cache_set_check_flush(brw, stencil_mt->region->bo);
> +
> brw->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
> depthbuffer_format, depth_surface_type,
> stencil_mt, hiz, separate_stencil,
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index 98759e2..fe95342 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -30,6 +30,7 @@
> #include "intel_reg.h"
> #include "intel_bufmgr.h"
> #include "intel_buffers.h"
> +#include "intel_fbo.h"
> #include "brw_context.h"
>
> static void
> @@ -88,6 +89,7 @@ intel_batchbuffer_reset(struct brw_context *brw)
> brw->batch.last_bo = brw->batch.bo;
>
> intel_batchbuffer_clear_cache(brw);
> + brw_render_cache_set_clear(brw);
>
> brw->batch.bo = drm_intel_bo_alloc(brw->bufmgr, "batchbuffer",
> BATCH_SZ, 4096);
> @@ -696,6 +698,8 @@ intel_batchbuffer_emit_mi_flush(struct brw_context *brw)
> }
> brw_emit_pipe_control_flush(brw, flags);
> }
> +
> + brw_render_cache_set_clear(brw);
> }
>
> void
> diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
> index d11cdb6..0f9abac 100644
> --- a/src/mesa/drivers/dri/i965/intel_fbo.c
> +++ b/src/mesa/drivers/dri/i965/intel_fbo.c
> @@ -36,6 +36,8 @@
> #include "main/context.h"
> #include "main/teximage.h"
> #include "main/image.h"
> +#include "main/hash_table.h"
> +#include "main/set.h"
>
> #include "swrast/swrast.h"
> #include "drivers/common/meta.h"
> @@ -600,24 +602,6 @@ intel_render_texture(struct gl_context * ctx,
> }
>
>
> -/**
> - * Called by Mesa when rendering to a texture is done.
> - */
> -static void
> -intel_finish_render_texture(struct gl_context * ctx, struct gl_renderbuffer *rb)
> -{
> - struct brw_context *brw = brw_context(ctx);
> -
> - DBG("Finish render %s texture\n", _mesa_get_format_name(rb->Format));
> -
> - /* Since we've (probably) rendered to the texture and will (likely) use
> - * it in the texture domain later on in this batchbuffer, flush the
> - * batch. Once again, we wish for a domain tracker in libdrm to cover
> - * usage inside of a batchbuffer like GEM does in the kernel.
> - */
> - intel_batchbuffer_emit_mi_flush(brw);
> -}
> -
> #define fbo_incomplete(fb, ...) do { \
> static GLuint msg_id = 0; \
> if (unlikely(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) { \
> @@ -953,6 +937,49 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
> intel_miptree_release(&new_mt);
> }
>
> +void
> +brw_render_cache_set_clear(struct brw_context *brw)
> +{
> + struct set_entry *entry;
> +
> + fprintf(stderr, "clear\n");
> +
> + set_foreach(brw->render_cache, entry) {
> + _mesa_set_remove(brw->render_cache, entry);
> + }
> +}
> +
> +void
> +brw_render_cache_set_add_bo(struct brw_context *brw, drm_intel_bo *bo)
> +{
> + printf(" add bo %p\n", bo);
> + _mesa_set_add(brw->render_cache, _mesa_hash_pointer(bo), bo);
> +}
> +
> +/**
> + * Emits an appropriate flush for a BO if it has been rendered to within the
> + * same batchbuffer as a read that's about to be emitted.
> + *
> + * The GPU has separate, incoherent caches for the render cache and the
> + * sampler cache, along with other caches. Usually data in the different
> + * caches don't interact (e.g. we don't render to our driver-generated
> + * immediate constant data), but for render-to-texture in FBOs we definitely
> + * do. When a batchbuffer is flushed, the kernel will ensure that everything
> + * necessary is flushed before another use of that BO, but for reuse from
> + * different caches within a batchbuffer, it's all our responsibility.
> + */
> +void
> +brw_render_cache_set_check_flush(struct brw_context *brw, drm_intel_bo *bo)
> +{
> + if (!_mesa_set_search(brw->render_cache, _mesa_hash_pointer(bo), bo)) {
> + printf("noflush bo %p\n", bo);
> + return;
> + }
> +
> + printf(" flush bo %p\n", bo);
> + intel_batchbuffer_emit_mi_flush(brw);
> +}
> +
> /**
> * Do one-time context initializations related to GL_EXT_framebuffer_object.
> * Hook in device driver functions.
> @@ -966,9 +993,10 @@ intel_fbo_init(struct brw_context *brw)
> dd->MapRenderbuffer = intel_map_renderbuffer;
> dd->UnmapRenderbuffer = intel_unmap_renderbuffer;
> dd->RenderTexture = intel_render_texture;
> - dd->FinishRenderTexture = intel_finish_render_texture;
> dd->ValidateFramebuffer = intel_validate_framebuffer;
> dd->BlitFramebuffer = intel_blit_framebuffer;
> dd->EGLImageTargetRenderbufferStorage =
> intel_image_target_renderbuffer_storage;
> +
> + brw->render_cache = _mesa_set_create(brw, _mesa_key_pointer_equal);
> }
> diff --git a/src/mesa/drivers/dri/i965/intel_fbo.h b/src/mesa/drivers/dri/i965/intel_fbo.h
> index b8db7e2..fa457e2 100644
> --- a/src/mesa/drivers/dri/i965/intel_fbo.h
> +++ b/src/mesa/drivers/dri/i965/intel_fbo.h
> @@ -237,6 +237,10 @@ void
> intel_renderbuffer_upsample(struct brw_context *brw,
> struct intel_renderbuffer *irb);
>
> +void brw_render_cache_set_clear(struct brw_context *brw);
> +void brw_render_cache_set_add_bo(struct brw_context *brw, drm_intel_bo *bo);
> +void brw_render_cache_set_check_flush(struct brw_context *brw, drm_intel_bo *bo);
> +
> unsigned
> intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples);
>
> diff --git a/src/mesa/main/set.c b/src/mesa/main/set.c
> index dc3550c..bbae906 100644
> --- a/src/mesa/main/set.c
> +++ b/src/mesa/main/set.c
> @@ -199,6 +199,8 @@ set_rehash(struct set *ht, int new_size_index)
> if (table == NULL)
> return;
>
> + fprintf(stderr, "rehash %d\n", hash_sizes[ht->size_index].max_entries);
> +
> old_ht = *ht;
>
> ht->table = table;
> @@ -237,11 +239,15 @@ _mesa_set_add(struct set *ht, uint32_t hash, const void *key)
> set_rehash(ht, ht->size_index);
> }
>
> + fprintf(stderr, "add %d %d\n", ht->entries, ht->deleted_entries);
> hash_address = hash % ht->size;
> do {
> struct set_entry *entry = ht->table + hash_address;
> uint32_t double_hash;
>
> + fprintf(stderr, "%d %d\n", entry_is_present(entry),
> + entry_is_deleted(entry));
> +
> if (!entry_is_present(entry)) {
> if (entry_is_deleted(entry))
> ht->deleted_entries--;
>
Assuming you drop the debug prints, this is:
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
We should probably also send it to 10.1.1. I can backport it (just
dropping the stencil hunk) once it lands.
--Ken
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: OpenPGP digital signature
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20140305/40c10935/attachment.pgp>
More information about the mesa-dev
mailing list