[Mesa-dev] [PATCH 17/18] i965/ps/gen8: Refactor state uploading

Kenneth Graunke kenneth at whitecape.org
Wed Apr 29 10:23:21 PDT 2015


On Wednesday, April 29, 2015 07:47:26 PM Pohjolainen, Topi wrote:
> On Thu, Apr 23, 2015 at 09:58:22PM +0300, Pohjolainen, Topi wrote:
> > On Thu, Apr 23, 2015 at 11:53:49AM -0700, Matt Turner wrote:
> > > On Wed, Apr 22, 2015 at 1:47 PM, Topi Pohjolainen
> > > <topi.pohjolainen at intel.com> wrote:
> > > > Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
> > > > ---
> > > >  src/mesa/drivers/dri/i965/brw_state.h     | 12 +++++
> > > >  src/mesa/drivers/dri/i965/gen8_ps_state.c | 74 ++++++++++++++++++++-----------
> > > >  2 files changed, 59 insertions(+), 27 deletions(-)
> > > >
> > > > diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
> > > > index 178f039..0c4f65e 100644
> > > > --- a/src/mesa/drivers/dri/i965/brw_state.h
> > > > +++ b/src/mesa/drivers/dri/i965/brw_state.h
> > > > @@ -265,6 +265,18 @@ void gen7_set_surface_mcs_info(struct brw_context *brw,
> > > >  void gen7_check_surface_setup(uint32_t *surf, bool is_render_target);
> > > >  void gen7_init_vtable_surface_functions(struct brw_context *brw);
> > > >
> > > > +/* gen8_ps_state.c */
> > > > +void gen8_upload_ps_state(struct brw_context *brw,
> > > > +                          const struct gl_fragment_program *fp,
> > > > +                          const struct brw_stage_state *stage_state,
> > > > +                          const struct brw_wm_prog_data *prog_data,
> > > > +                          uint32_t fast_clear_op);
> > > > +
> > > > +void gen8_upload_ps_extra(struct brw_context *brw,
> > > > +                          const struct gl_fragment_program *fp,
> > > > +                          const struct brw_wm_prog_data *prog_data,
> > > > +                          bool multisampled_fbo);
> > > > +
> > > >  /* gen7_sol_state.c */
> > > >  void gen7_upload_3dstate_so_decl_list(struct brw_context *brw,
> > > >                                        const struct brw_vue_map *vue_map);
> > > > diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
> > > > index 5f39e12..da6136b 100644
> > > > --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
> > > > +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
> > > > @@ -27,15 +27,13 @@
> > > >  #include "brw_defines.h"
> > > >  #include "intel_batchbuffer.h"
> > > >
> > > > -static void
> > > > -upload_ps_extra(struct brw_context *brw)
> > > > +void
> > > > +gen8_upload_ps_extra(struct brw_context *brw,
> > > > +                     const struct gl_fragment_program *fp,
> > > > +                     const struct brw_wm_prog_data *prog_data,
> > > > +                     bool multisampled_fbo)
> > > >  {
> > > >     struct gl_context *ctx = &brw->ctx;
> > > > -   /* BRW_NEW_FRAGMENT_PROGRAM */
> > > > -   const struct brw_fragment_program *fp =
> > > > -      brw_fragment_program_const(brw->fragment_program);
> > > > -   /* BRW_NEW_FS_PROG_DATA */
> > > > -   const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
> > > >     uint32_t dw1 = 0;
> > > >
> > > >     dw1 |= GEN8_PSX_PIXEL_SHADER_VALID;
> > > > @@ -47,16 +45,14 @@ upload_ps_extra(struct brw_context *brw)
> > > >     if (prog_data->num_varying_inputs != 0)
> > > >        dw1 |= GEN8_PSX_ATTRIBUTE_ENABLE;
> > > >
> > > > -   if (fp->program.Base.InputsRead & VARYING_BIT_POS)
> > > > +   if (fp->Base.InputsRead & VARYING_BIT_POS)
> > > >        dw1 |= GEN8_PSX_USES_SOURCE_DEPTH | GEN8_PSX_USES_SOURCE_W;
> > > >
> > > > -   /* BRW_NEW_NUM_SAMPLES | _NEW_MULTISAMPLE */
> > > > -   bool multisampled_fbo = brw->num_samples > 1;
> > > >     if (multisampled_fbo &&
> > > > -       _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1)
> > > > +       _mesa_get_min_invocations_per_fragment(ctx, fp, false) > 1)
> > > >        dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE;
> > > >
> > > > -   if (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN)
> > > > +   if (fp->Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN)
> > > >        dw1 |= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK;
> > > >
> > > >     if (prog_data->uses_omask)
> > > > @@ -68,6 +64,20 @@ upload_ps_extra(struct brw_context *brw)
> > > >     ADVANCE_BATCH();
> > > >  }
> > > >
> > > > +static void
> > > > +upload_ps_extra(struct brw_context *brw)
> > > > +{
> > > > +   /* BRW_NEW_FRAGMENT_PROGRAM */
> > > > +   const struct brw_fragment_program *fp =
> > > > +      brw_fragment_program_const(brw->fragment_program);
> > > > +   /* BRW_NEW_FS_PROG_DATA */
> > > > +   const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
> > > > +   /* BRW_NEW_NUM_SAMPLES | _NEW_MULTISAMPLE */
> > > > +   const bool multisampled_fbo = brw->num_samples > 1;
> > > > +
> > > > +   gen8_upload_ps_extra(brw, &fp->program, prog_data, multisampled_fbo);
> > > > +}
> > > > +
> > > >  const struct brw_tracked_state gen8_ps_extra = {
> > > >     .dirty = {
> > > >        .mesa  = _NEW_MULTISAMPLE,
> > > > @@ -118,23 +128,24 @@ const struct brw_tracked_state gen8_wm_state = {
> > > >     .emit = upload_wm_state,
> > > >  };
> > > >
> > > > -static void
> > > > -upload_ps_state(struct brw_context *brw)
> > > > +void
> > > > +gen8_upload_ps_state(struct brw_context *brw,
> > > > +                     const struct gl_fragment_program *fp,
> > > > +                     const struct brw_stage_state *stage_state,
> > > > +                     const struct brw_wm_prog_data *prog_data,
> > > > +                     uint32_t fast_clear_op)
> > > >  {
> > > >     struct gl_context *ctx = &brw->ctx;
> > > >     uint32_t dw3 = 0, dw6 = 0, dw7 = 0, ksp0, ksp2 = 0;
> > > >
> > > > -   /* BRW_NEW_FS_PROG_DATA */
> > > > -   const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
> > > > -
> > > >     /* Initialize the execution mask with VMask.  Otherwise, derivatives are
> > > >      * incorrect for subspans where some of the pixels are unlit.  We believe
> > > >      * the bit just didn't take effect in previous generations.
> > > >      */
> > > >     dw3 |= GEN7_PS_VECTOR_MASK_ENABLE;
> > > >
> > > > -   dw3 |=
> > > > -      (ALIGN(brw->wm.base.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
> > > > +   dw3 |= (ALIGN(stage_state->sampler_count, 4) / 4) <<
> > > > +             GEN7_PS_SAMPLER_COUNT_SHIFT;
> > > 
> > > Indentation of this line looks a little bit odd. Probably remove a
> > > couple of spaces before GEN7_PS_SAMPLER_COUNT_SHIFT.
> > 
> > Good catch, I can also start using SET_FIELD(), thanks!
> 
> Only that we now have regression in piglit tests:
> 
> max-samplers: ../../../../../../src/mesa/drivers/dri/i965/gen8_ps_state.c:148: gen8_upload_ps_state: Assertion `(fieldval & ~ (((1<<((29)-(27)+1))-1)<<(27))) == 0' failed.
> 
> Original just tries to use a value that doesn't fit, but the assertion
> provided by SET_FIELD() now catches this.

Whoops, that's my bug :)  3DSTATE_PS only allows prefetching up to 16
samplers - I should have fixed this when expanding to 32 samplers on
Haswell+.  The good news is, we're just trashing "reserved" bits in
the field, not other fields.

Topi and I chatted about this on IRC, and I suggested simply clamping
sampler_count to 16 (since it's just for prefetching anyway).  He's
planning to send a patch for that.

--Ken
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150429/f2042172/attachment.sig>


More information about the mesa-dev mailing list