[Mesa-dev] [PATCH 02/20] i965: Enable gather push constants

Francisco Jerez currojerez at riseup.net
Fri Oct 23 07:27:32 PDT 2015


Abdiel Janulgue <abdiel.janulgue at linux.intel.com> writes:

> The 3DSTATE_GATHER_POOL_ALLOC is used to enable or disable the gather
> push constants feature within a context. This patch provides the toggle
> functionality of using gather push constants to program constant data
> within a batch.
>
> Using gather push constants require that a gather pool be allocated so
> that the resource streamer can flush the packed constants it gathered.
> The pool is later referenced by the 3DSTATE_CONSTANT_* command to
> program the push constant data.
>
> Also introduce INTEL_UBO_GATHER to selectively enable which shader stage
> uses gather constants for ubo fetches.
>
> v2: GEN8 support.
>
> Signed-off-by: Abdiel Janulgue <abdiel.janulgue at linux.intel.com>
> ---
>  src/mesa/drivers/dri/i965/brw_binding_tables.c | 59 ++++++++++++++++++++++++++
>  src/mesa/drivers/dri/i965/brw_context.c        | 39 ++++++++++++++++-
>  src/mesa/drivers/dri/i965/brw_context.h        | 10 +++++
>  src/mesa/drivers/dri/i965/brw_state.h          |  1 +
>  4 files changed, 108 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c
> index 508f1f0..aa16b6d 100644
> --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
> +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
> @@ -276,6 +276,60 @@ gen7_update_binding_table_from_array(struct brw_context *brw,
>     ADVANCE_BATCH();
>  }
>  
> +static void
> +gen7_init_gather_pool(struct brw_context *brw)
> +{
> +   if (!brw->use_resource_streamer ||
> +       (!brw->vs_ubo_gather && !brw->gs_ubo_gather && !brw->fs_ubo_gather))
> +      return;
> +
> +   if (!brw->gather_pool.bo) {
> +      brw->gather_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "gather_pool",
> +                                               brw->gather_pool.size, 4096);
> +      brw->gather_pool.next_offset = 0;
> +   }
> +}
> +
> +void
> +gen7_toggle_gather_constants(struct brw_context *brw, bool enable)
> +{
> +   if (!brw->use_resource_streamer ||
> +       (enable && !(brw->vs_ubo_gather || brw->gs_ubo_gather ||
> +                    brw->fs_ubo_gather)))
> +      return;
> +

You should probably 'assert(brw->is_haswell || brw->gen == 8)' here.
And instead of having a separate gen7_init_gather_pool() function that
the caller needs to remember to call before this in order to avoid a
segfault below, you could just check the value of brw->gather_pool.bo
here and in case it's zero allocate a new buffer object to back the
pool.

> +   uint32_t dw1 = enable ? BRW_GATHER_CONSTANTS_ENABLE : 0;
> +   if (brw->is_haswell) {
> +      dw1 |= HSW_GATHER_CONSTANTS_RESERVED | (enable ? GEN7_MOCS_L3 : 0);
> +   } else if (brw->gen == 8) {
> +      dw1 |= (enable ? BDW_MOCS_WB : 0);
> +   }

How about:

| const unsigned mocs = (brw->gen == 8 ? BDW_MOCS_WB : GEN7_MOCS_L3);
| const unsigned dw1 = (enable ? BRW_GATHER_CONSTANTS_ENABLE | mocs) |
|                      (brw->is_haswell ? HSW_GATHER_CONSTANTS_RESERVED : 0);

> +   int pkt_len = brw->gen >= 8 ? 4 : 3;

Could be const.

> +
> +   BEGIN_BATCH(pkt_len);
> +   OUT_BATCH(_3DSTATE_GATHER_POOL_ALLOC << 16 | (pkt_len - 2));
> +   if (brw->gen >=8 ) {

Missing whitespace before the 8 and extra whitespace after it.

> +      if (enable) {
> +         OUT_RELOC64(brw->gather_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
> +         OUT_BATCH(brw->gather_pool.bo->size);
> +      } else {
> +         OUT_BATCH(dw1);
> +         OUT_BATCH(0);
> +         OUT_BATCH(0);
> +      }
> +   } else {
> +      if (enable) {
> +         OUT_RELOC(brw->gather_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
> +         OUT_RELOC(brw->gather_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
> +                   brw->gather_pool.bo->size);
> +      } else {
> +         OUT_BATCH(dw1);
> +         OUT_BATCH(0);
> +      }
> +   }
> +   ADVANCE_BATCH();
> +}
> +
>  /**
>   * Disable hardware binding table support, falling back to the
>   * older software-generated binding table mechanism.
> @@ -294,6 +348,7 @@ gen7_disable_hw_binding_tables(struct brw_context *brw)
>     brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
>  
>     int pkt_len = brw->gen >= 8 ? 4 : 3;
> +   gen7_toggle_gather_constants(brw, false);
>  
>     BEGIN_BATCH(pkt_len);
>     OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
> @@ -360,12 +415,16 @@ gen7_enable_hw_binding_tables(struct brw_context *brw)
>               brw->hw_bt_pool.bo->size);
>     }
>     ADVANCE_BATCH();
> +
> +   gen7_init_gather_pool(brw);
> +   gen7_toggle_gather_constants(brw, true);

Seems rather unexpected for gen7_enable_hw_binding_tables() to enable
gather constants as a side effect.

>  }
>  
>  void
>  gen7_reset_hw_bt_pool_offsets(struct brw_context *brw)
>  {
>     brw->hw_bt_pool.next_offset = 0;
> +   brw->gather_pool.next_offset = 0;

IIUC gen7_reset_hw_bt_pool_offsets() will be called whenever we run out
of space in the HW binding table pool.  Why should that cause the gather
constant pool offset to be reset?

>  }
>  
>  const struct brw_tracked_state gen7_hw_binding_tables = {
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
> index 7c1c133..8079465 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -68,6 +68,7 @@
>  #include "tnl/tnl.h"
>  #include "tnl/t_pipeline.h"
>  #include "util/ralloc.h"
> +#include "util/u_atomic.h"
>  
>  /***************************************
>   * Mesa's Driver Functions
> @@ -687,6 +688,26 @@ brw_process_driconf_options(struct brw_context *brw)
>        driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
>  }
>  
> +static void
> +brw_process_intel_gather_variable(struct brw_context *brw)
> +{
> +   uint64_t INTEL_UBO_GATHER = 0;
> +
> +   static const struct dri_debug_control gather_control[] = {
> +      { "vs", 1 << MESA_SHADER_VERTEX },
> +      { "gs", 1 << MESA_SHADER_GEOMETRY },
> +      { "fs", 1 << MESA_SHADER_FRAGMENT },
> +      { NULL, 0 }
> +   };
> +   uint64_t intel_ubo_gather = driParseDebugString(getenv("INTEL_UBO_GATHER"),
> +                                                   gather_control);
> +   (void) p_atomic_cmpxchg(&INTEL_UBO_GATHER, 0, intel_ubo_gather);
> +
As we discussed offline this cmpxchg is redundant because the target
variable is local anyway.

> +   brw->vs_ubo_gather = INTEL_UBO_GATHER & (1 << MESA_SHADER_VERTEX);
> +   brw->gs_ubo_gather = INTEL_UBO_GATHER & (1 << MESA_SHADER_GEOMETRY);
> +   brw->fs_ubo_gather = INTEL_UBO_GATHER & (1 << MESA_SHADER_FRAGMENT);
> +}
> +
IIUC this will cause the UBO gather feature to be disabled by default
unless overridden by INTEL_UBO_GATHER environment variable.  Why can't
we just enable it unconditionally?  Is it because you've seen
regressions with this feature enabled?

>  GLboolean
>  brwCreateContext(gl_api api,
>  	         const struct gl_config *mesaVis,
> @@ -750,6 +771,8 @@ brwCreateContext(gl_api api,
>     brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
>     brw->has_swizzling = screen->hw_has_swizzling;
>  
> +   brw_process_intel_gather_variable(brw);
> +
>     brw->vs.base.stage = MESA_SHADER_VERTEX;
>     brw->gs.base.stage = MESA_SHADER_GEOMETRY;
>     brw->wm.base.stage = MESA_SHADER_FRAGMENT;
> @@ -894,7 +917,8 @@ brwCreateContext(gl_api api,
>  
>     brw->use_resource_streamer = screen->has_resource_streamer &&
>        (brw_env_var_as_boolean("INTEL_USE_HW_BT", false) ||
> -       brw_env_var_as_boolean("INTEL_USE_GATHER", false));
> +       brw_env_var_as_boolean("INTEL_USE_GATHER", false) ||

The INTEL_USE_GATHER environment variable doesn't seem to do anything
useful right now.  What's the purpose?

> +       brw->vs_ubo_gather || brw->gs_ubo_gather || brw->fs_ubo_gather);
>  
>     ctx->VertexProgram._MaintainTnlProgram = true;
>     ctx->FragmentProgram._MaintainTexEnvProgram = true;
> @@ -909,6 +933,16 @@ brwCreateContext(gl_api api,
>     if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
>        ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
>  
> +   /* The gather pool is required by the hardware to represent all combined
> +    * constants gathered from various sources where it is then fed into the push
> +    * constant state. There is no fixed amount how many constant
> +    * states are uploaded per batch, so use the combined per-stage limits instead.
> +    */
> +   brw->gather_pool.size =
> +      brw->ctx.Const.Program[MESA_SHADER_VERTEX].MaxCombinedUniformComponents +
> +      brw->ctx.Const.Program[MESA_SHADER_GEOMETRY].MaxCombinedUniformComponents +
> +      brw->ctx.Const.Program[MESA_SHADER_FRAGMENT].MaxCombinedUniformComponents;
> +

This doesn't look right, MaxCombinedUniformComponents is in units of
32-bit components while gather_pool.size is in bytes.  And also I guess
it wouldn't hurt to replace this with a loop up to MESA_SHADER_STAGES so
you don't need to come back and change it when tessellation and compute
shaders are supported.

>     if (INTEL_DEBUG & DEBUG_SHADER_TIME)
>        brw_init_shader_time(brw);
>  
> @@ -963,7 +997,10 @@ intelDestroyContext(__DRIcontext * driContextPriv)
>        drm_intel_bo_unreference(brw->wm.base.scratch_bo);
>  
>     gen7_reset_hw_bt_pool_offsets(brw);
> +   drm_intel_bo_unreference(brw->gather_pool.bo);
>     drm_intel_bo_unreference(brw->hw_bt_pool.bo);
> +
> +   brw->gather_pool.bo = NULL;
>     brw->hw_bt_pool.bo = NULL;
>  
>     drm_intel_gem_context_destroy(brw->hw_ctx);
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 41ba769..33c49b7 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1192,6 +1192,9 @@ struct brw_context
>     bool no_simd8;
>     bool use_rep_send;
>     bool use_resource_streamer;
> +   bool vs_ubo_gather;
> +   bool gs_ubo_gather;
> +   bool fs_ubo_gather;
>  
>     /**
>      * Some versions of Gen hardware don't do centroid interpolation correctly
> @@ -1461,6 +1464,13 @@ struct brw_context
>        uint32_t next_offset;
>     } hw_bt_pool;
>  
> +   /* Internal storage used by the RS to flush and refer to constant data */
> +   struct {
> +      drm_intel_bo *bo;
> +      uint32_t next_offset;
> +      uint32_t size;
> +   } gather_pool;
> +
>     struct {
>        uint32_t state_offset;
>        uint32_t blend_state_offset;
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
> index afce8ad..c7c7e0b 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -375,6 +375,7 @@ void gen7_update_binding_table_from_array(struct brw_context *brw,
>                                            int num_surfaces);
>  void gen7_disable_hw_binding_tables(struct brw_context *brw);
>  void gen7_reset_hw_bt_pool_offsets(struct brw_context *brw);
> +void gen7_toggle_gather_constants(struct brw_context *brw, bool enable);
>  
>  #ifdef __cplusplus
>  }
> -- 
> 1.9.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20151023/d4ec70d4/attachment.sig>


More information about the mesa-dev mailing list