[Mesa-dev] [PATCH 02/12] i965: Add an option to not generate the SIMD8 fragment shader

Kristian Høgsberg hoegsberg at gmail.com
Mon Aug 11 22:35:18 PDT 2014


On Mon, Aug 11, 2014 at 08:08:33PM -0700, Ben Widawsky wrote:
> On Mon, Aug 11, 2014 at 05:29:32PM -0700, Kristian Høgsberg wrote:
> > For now, this can only be triggered with a new 'no8' INTEL_DEBUG option
> > and a new context flag.  We'll use the context flag later, but introducing
> > it now lets us bisect to this commit if it breaks something.
> > 
> > Signed-off-by: Kristian Høgsberg <krh at bitplanet.net>
> > ---
> >  src/mesa/drivers/dri/i965/brw_context.h   |  2 ++
> >  src/mesa/drivers/dri/i965/brw_fs.cpp      | 14 ++++++++++++--
> >  src/mesa/drivers/dri/i965/gen7_wm_state.c |  4 ++--
> >  src/mesa/drivers/dri/i965/gen8_ps_state.c |  4 ++--
> >  src/mesa/drivers/dri/i965/intel_debug.c   |  1 +
> >  src/mesa/drivers/dri/i965/intel_debug.h   |  1 +
> >  6 files changed, 20 insertions(+), 6 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> > index 1bbcf46..ab7d858 100644
> > --- a/src/mesa/drivers/dri/i965/brw_context.h
> > +++ b/src/mesa/drivers/dri/i965/brw_context.h
> > @@ -340,6 +340,7 @@ struct brw_wm_prog_data {
> >        /** @} */
> >     } binding_table;
> >  
> > +   bool no_8;
> >     bool dual_src_blend;
> >     bool uses_pos_offset;
> >     bool uses_omask;
> > @@ -1031,6 +1032,7 @@ struct brw_context
> >     bool has_compr4;
> >     bool has_negative_rhw_bug;
> >     bool has_pln;
> > +   bool no_simd8;
> >  
> >     /**
> >      * Some versions of Gen hardware don't do centroid interpolation correctly
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > index bf95b57..061c32d 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > @@ -3230,15 +3230,25 @@ brw_wm_fs_emit(struct brw_context *brw,
> >        }
> >     }
> >  
> > +   exec_list *simd8_instructions;
> > +   int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || brw->no_simd8;
> > +   if (no_simd8 && simd16_instructions) {
> > +      simd8_instructions = NULL;
> > +      prog_data->no_8 = true;
> > +   } else {
> > +      simd8_instructions = &v.instructions;
> > +      prog_data->no_8 = false;
> > +   }
> > +
> 
> Was the fallback to simd8 when !simd16_instructions (and DEBUG_NO8)
> intentional? I have no suggestions for what to do in this case, other
> than possibly assert. I was just curious.

Yes, the idea is that for the debug option it's better to fall back to
SIMD8 and work correctly than to fail because we can't to SIMD16.  Of
course, for the use in fast clear, we absolutely need the SIMD16
shader or we'll hang the GPU.  Perhaps we need to assert that we get
SIMD16 in the fast clear code.

> >     const unsigned *assembly = NULL;
> >     if (brw->gen >= 8) {
> >        gen8_fs_generator g(brw, mem_ctx, key, prog_data, prog, fp, v.do_dual_src);
> > -      assembly = g.generate_assembly(&v.instructions, simd16_instructions,
> > +      assembly = g.generate_assembly(simd8_instructions, simd16_instructions,
> >                                       final_assembly_size);
> >     } else {
> >        fs_generator g(brw, mem_ctx, key, prog_data, prog, fp, v.do_dual_src,
> >                       v.runtime_check_aads_emit, INTEL_DEBUG & DEBUG_WM);
> > -      assembly = g.generate_assembly(&v.instructions, simd16_instructions,
> > +      assembly = g.generate_assembly(simd8_instructions, simd16_instructions,
> >                                       final_assembly_size);
> >     }
> 
> Suppose you could combine the two.

I don't think so? g is a different type in the two branches.

> >  
> > diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
> > index c03d19d..c3e9316 100644
> > --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
> > +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
> > @@ -223,9 +223,9 @@ upload_ps_state(struct brw_context *brw)
> >        _mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, false);
> >     assert(min_inv_per_frag >= 1);
> >  
> > -   if (brw->wm.prog_data->prog_offset_16) {
> > +   if (brw->wm.prog_data->prog_offset_16 || brw->wm.prog_data->no_8) {
> >        dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
> > -      if (min_inv_per_frag == 1) {
> > +      if (!brw->wm.prog_data->no_8 && min_inv_per_frag == 1) {
> >           dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
> >           dw5 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
> >                   GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
> 
> This follows on from the question 2 hunks above, and it looks wrong. If
> you don't have 16 wide, but do have no_8, you're setting
> GEN7_PS_16_DISPATCH_ENABLE. Probably I've missed something.
> 
> > diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
> > index f58d49c..49d4fe0 100644
> > --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
> > +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
> > @@ -195,9 +195,9 @@ upload_ps_state(struct brw_context *brw)
> >        _mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, false);
> >     assert(min_invocations_per_fragment >= 1);
> >  
> > -   if (brw->wm.prog_data->prog_offset_16) {
> > +   if (brw->wm.prog_data->prog_offset_16 || brw->wm.prog_data->no_8) {
> >        dw6 |= GEN7_PS_16_DISPATCH_ENABLE;
> > -      if (min_invocations_per_fragment == 1) {
> > +      if (!brw->wm.prog_data->no_8 && min_invocations_per_fragment == 1) {
> >           dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
> >           dw7 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
> >                   GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
> > diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c
> > index c72fce2..1fb8b6c 100644
> > --- a/src/mesa/drivers/dri/i965/intel_debug.c
> > +++ b/src/mesa/drivers/dri/i965/intel_debug.c
> > @@ -66,6 +66,7 @@ static const struct dri_debug_control debug_control[] = {
> >     { "nodualobj", DEBUG_NO_DUAL_OBJECT_GS },
> >     { "optimizer", DEBUG_OPTIMIZER },
> >     { "noann", DEBUG_NO_ANNOTATION },
> > +   { "no8",  DEBUG_NO8 },
> >     { NULL,    0 }
> >  };
> >  
> > diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h
> > index 37dc34a..8e1c299 100644
> > --- a/src/mesa/drivers/dri/i965/intel_debug.h
> > +++ b/src/mesa/drivers/dri/i965/intel_debug.h
> > @@ -62,6 +62,7 @@ extern uint64_t INTEL_DEBUG;
> >  #define DEBUG_NO_DUAL_OBJECT_GS 0x80000000
> >  #define DEBUG_OPTIMIZER   0x100000000
> >  #define DEBUG_NO_ANNOTATION 0x200000000
> > +#define DEBUG_NO8        0x40000000
> >  
> >  #ifdef HAVE_ANDROID_PLATFORM
> >  #define LOG_TAG "INTEL-MESA"
> > -- 
> > 2.0.0
> > 
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
> -- 
> Ben Widawsky, Intel Open Source Technology Center


More information about the mesa-dev mailing list