[Mesa-dev] [RFC] i965: Resolve color for all active shader images in intel_update_state().

Chris Wilson chris at chris-wilson.co.uk
Mon Sep 7 10:50:25 PDT 2015


On Mon, Sep 07, 2015 at 10:15:56AM -0700, Kenneth Graunke wrote:
> On Saturday, September 05, 2015 08:58:07 PM Chris Wilson wrote:
> > On Sat, Sep 05, 2015 at 11:30:44AM -0700, Jordan Justen wrote:
> > > From: Francisco Jerez <currojerez at riseup.net>
> > > 
> > > Fixes arb_shader_image_load_store/execution/load-from-cleared-image.shader_test
> > > 
> > > Cc: Chris Wilson <chris at chris-wilson.co.uk>
> > > Cc: Jason Ekstrand <jason.ekstrand at intel.com>
> > > Tested-by: Jordan Justen <jordan.l.justen at intel.com>
> > > ---
> > >  RE: i965: Perform an explicit flush after doing _mesa_meta_pbo_TexSubImage
> > > 
> > >  curro has some concerns about potential perf impact by this and
> > >  wanted it to be checked on small-core w/CPU bound apps.
> > >  Unfortunately, he is on vacation now.
> > 
> > One thing that may help (other than having a bitfield of valid
> > ImageUnits being more cache friendly) is that intel_update_state() need
> > only invalidate state. Moving the heavyweight resolves/flushes to just
> > prior to running the pipeline should help reduce the profile of the
> > fairly frequently called mesa_update_state().
> > -Chris
> 
>  _________________________________ 
> < welcome to recursive meta hell! >
>  --------------------------------- 
>       \                    / \  //\
>        \    |\___/|      /   \//  \\
>             /0  0  \__  /    //  | \ \    
>            /     /  \/_/    //   |  \  \  
>            @_^_@'/   \/_   //    |   \   \ 
>            //_^_/     \/_ //     |    \    \
>         ( //) |        \///      |     \     \
>       ( / /) _|_ /   )  //       |      \     _\
>     ( // /) '/,_ _ _/  ( ; -.    |    _ _\.-~        .-~~~^-.
>   (( / / )) ,-{        _      `-.|.-~-.           .~         `.
>  (( // / ))  '/\      /                 ~-. _ .-~      .-~^-.  \
>  (( /// ))      `.   {            }                   /      \  \
>   (( / ))     .----~-.\        \-'                 .~         \  `. \^-.
>              ///.----..>        \             _ -~             `.  ^-`  ^-_
>                ///-._ _ _ _ _ _ _}^ - - - - ~                     ~-- ,.-~
>                                                                   /.-~
> 
> That sounds reasonable, but fair warning: Kristian spent literally a month
> trying to figure out how to make this work, and we tried a wide variety of
> reasonable sounding plans that ended up falling apart.  Awkward resolve
> timings make for a world of nasty corner cases.

Solution I have is to add a new callback from vbo before vbo->draw_prims
to avoid the recursion. Looking at it, I am pretty sure the current code
will also recurse and fail miserable along some meta paths.

Haven't checked non-i965 for sanity, so wip:

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 78cd269..580131d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -160,37 +160,12 @@ static void
 intel_update_state(struct gl_context * ctx, GLuint new_state)
 {
    struct brw_context *brw = brw_context(ctx);
-   struct intel_texture_object *tex_obj;
-   struct intel_renderbuffer *depth_irb;
 
    if (ctx->swrast_context)
       _swrast_InvalidateState(ctx, new_state);
    _vbo_InvalidateState(ctx, new_state);
 
    brw->NewGLState |= new_state;
-
-   _mesa_unlock_context_textures(ctx);
-
-   /* Resolve the depth buffer's HiZ buffer. */
-   depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
-   if (depth_irb)
-      intel_renderbuffer_resolve_hiz(brw, depth_irb);
-
-   /* Resolve depth buffer and render cache of each enabled texture. */
-   int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
-   for (int i = 0; i <= maxEnabledUnit; i++) {
-      if (!ctx->Texture.Unit[i]._Current)
-	 continue;
-      tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
-      if (!tex_obj || !tex_obj->mt)
-	 continue;
-      intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
-      intel_miptree_resolve_color(brw, tex_obj->mt);
-      if (brw_check_dirty(tex_obj->mt->bo))
-         brw->ctx.NewDriverState |= BRW_NEW_CACHE_FLUSH;
-   }
-
-   _mesa_lock_context_textures(ctx);
 }
 
 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 7a42403..bda251f 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -339,6 +339,38 @@ brw_merge_inputs(struct brw_context *brw,
    }
 }
 
+static void
+brw_draw_resolve(struct gl_context *ctx)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_texture_object *tex_obj;
+   struct intel_renderbuffer *depth_irb;
+
+   /* Resolve the depth buffer's HiZ buffer. */
+   depth_irb = intel_get_renderbuffer(brw->ctx.DrawBuffer, BUFFER_DEPTH);
+   if (depth_irb)
+      intel_renderbuffer_resolve_hiz(brw, depth_irb);
+
+   /* Resolve depth buffer and render cache of each enabled texture. */
+   int maxEnabledUnit = brw->ctx.Texture._MaxEnabledTexImageUnit;
+   for (int i = 0; i <= maxEnabledUnit; i++) {
+      if (!brw->ctx.Texture.Unit[i]._Current)
+         continue;
+      tex_obj = intel_texture_object(brw->ctx.Texture.Unit[i]._Current);
+      if (!tex_obj || !tex_obj->mt)
+         continue;
+      intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
+      intel_miptree_resolve_color(brw, tex_obj->mt);
+      if (tex_obj->mt->bo->dirty)
+         brw->ctx.NewDriverState |= BRW_NEW_CACHE_FLUSH;
+   }
+}
+
+
 /**
  * \brief Call this after drawing to mark which buffers need resolving
  *
@@ -592,6 +624,7 @@ brw_draw_init(struct brw_context *brw)
 
    /* Register our drawing function:
     */
+   vbo->resolve = brw_draw_resolve;
    vbo->draw_prims = brw_draw_prims;
 
    for (int i = 0; i < VERT_ATTRIB_MAX; i++)
diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h
index 2aaff5d..b64c468 100644
--- a/src/mesa/vbo/vbo.h
+++ b/src/mesa/vbo/vbo.h
@@ -89,6 +89,7 @@ vbo_initialize_save_dispatch(const struct gl_context *ctx,
                              struct _glapi_table *exec);
 
 
+typedef void (*vbo_resolve_func)( struct gl_context *ctx);
 typedef void (*vbo_draw_func)( struct gl_context *ctx,
 			       const struct _mesa_prim *prims,
 			       GLuint nr_prims,
diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
index e3eb286..1f0b46a 100644
--- a/src/mesa/vbo/vbo_context.c
+++ b/src/mesa/vbo/vbo_context.c
@@ -148,11 +148,30 @@ static void init_mat_currval(struct gl_context *ctx)
 }
 
 
+static void nop_resolve(struct gl_context *ctx)
+{
+}
+
+static void nop_draw(struct gl_context *ctx,
+                     const struct _mesa_prim *prims,
+                     GLuint nr_prims,
+                     const struct _mesa_index_buffer *ib,
+                     GLboolean index_bounds_valid,
+                     GLuint min_index,
+                     GLuint max_index,
+                     struct gl_transform_feedback_object *tfb_vertcount,
+                     unsigned stream,
+                     struct gl_buffer_object *indirect)
+{
+}
+
 GLboolean _vbo_CreateContext( struct gl_context *ctx )
 {
    struct vbo_context *vbo = CALLOC_STRUCT(vbo_context);
 
    ctx->vbo_context = vbo;
+   vbo->draw_prims = nop_draw;
+   vbo->resolve = nop_resolve;
 
    /* Initialize the arrayelt helper
     */
diff --git a/src/mesa/vbo/vbo_context.h b/src/mesa/vbo/vbo_context.h
index a376efe..c4033ee4 100644
--- a/src/mesa/vbo/vbo_context.h
+++ b/src/mesa/vbo/vbo_context.h
@@ -75,6 +75,7 @@ struct vbo_context {
    /* Callback into the driver.  This must always succeed, the driver
     * is responsible for initiating any fallback actions required:
     */
+   vbo_resolve_func resolve;
    vbo_draw_func draw_prims;
 };
 
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 34d2c1d..d592ae4 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -549,6 +549,7 @@ vbo_bind_arrays(struct gl_context *ctx)
    struct vbo_context *vbo = vbo_context(ctx);
    struct vbo_exec_context *exec = &vbo->exec;
 
+   vbo->resolve(ctx);
    vbo_draw_method(vbo, DRAW_ARRAYS);
 
    if (exec->array.recalculate_inputs) {
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 2bfb0c3..dcbb9a5 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -388,11 +388,14 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, GLboolean keepUnmapped)
 
       if (exec->vtx.copied.nr != exec->vtx.vert_count) {
 	 struct gl_context *ctx = exec->ctx;
+
+         vbo_context(ctx)->resolve(ctx);
 	 
 	 /* Before the update_state() as this may raise _NEW_VARYING_VP_INPUTS
           * from _mesa_set_varying_vp_inputs().
 	  */
-	 vbo_exec_bind_arrays( ctx );
+         vbo_draw_method(vbo_context(ctx), DRAW_BEGIN_END);
+         vbo_exec_bind_arrays( ctx );
 
          if (ctx->NewState)
             _mesa_update_state( ctx );
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
index b1fd689..2103b8e 100644
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -297,6 +297,8 @@ vbo_save_playback_vertex_list(struct gl_context *ctx, void *data)
          return;
       }
 
+      vbo_context(ctx)->resolve(ctx);
+
       vbo_bind_vertex_list( ctx, node );
 
       vbo_draw_method(vbo_context(ctx), DRAW_DISPLAY_LIST);
-Chris


-- 
Chris Wilson, Intel Open Source Technology Centre


More information about the mesa-dev mailing list