[Mesa-dev] [PATCH] i965: Add an option to not generate the SIMD8 fragment shader

Kenneth Graunke kenneth at whitecape.org
Mon Jul 14 09:17:20 PDT 2014


On Friday, July 11, 2014 11:26:30 AM Kristian Høgsberg wrote:
> For now, this can only be triggered with a new 'no8' INTEL_DEBUG option
> 
> Signed-off-by: Kristian Høgsberg <krh at bitplanet.net>
> ---
>  src/mesa/drivers/dri/i965/brw_context.h   |  2 ++
>  src/mesa/drivers/dri/i965/brw_fs.cpp      | 14 ++++++++++++--
>  src/mesa/drivers/dri/i965/gen7_wm_state.c |  4 ++--
>  src/mesa/drivers/dri/i965/gen8_ps_state.c |  4 ++--
>  src/mesa/drivers/dri/i965/intel_debug.c   |  1 +
>  src/mesa/drivers/dri/i965/intel_debug.h   |  1 +
>  6 files changed, 20 insertions(+), 6 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
> index 2943a20..11cea04 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -340,6 +340,7 @@ struct brw_wm_prog_data {
>        /** @} */
>     } binding_table;
>  
> +   bool no_8;
>     bool dual_src_blend;
>     bool uses_pos_offset;
>     bool uses_omask;
> @@ -1039,6 +1040,7 @@ struct brw_context
>     bool has_compr4;
>     bool has_negative_rhw_bug;
>     bool has_pln;
> +   bool no_simd8;
>  
>     /**
>      * Some versions of Gen hardware don't do centroid interpolation 
correctly
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index a3ad375..f018d94 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -3258,15 +3258,25 @@ brw_wm_fs_emit(struct brw_context *brw,
>        }
>     }
>  
> +   exec_list *simd8_instructions;
> +   int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || brw->no_simd8;
> +   if (no_simd8 && simd16_instructions) {
> +      simd8_instructions = NULL;
> +      prog_data->no_8 = true;
> +   } else {
> +      simd8_instructions = &v.instructions;
> +      prog_data->no_8 = false;
> +   }
> +
>     const unsigned *assembly = NULL;
>     if (brw->gen >= 8) {
>        gen8_fs_generator g(brw, mem_ctx, key, prog_data, prog, fp, 
v.do_dual_src);
> -      assembly = g.generate_assembly(&v.instructions, simd16_instructions,
> +      assembly = g.generate_assembly(simd8_instructions, 
simd16_instructions,
>                                       final_assembly_size);
>     } else {
>        fs_generator g(brw, mem_ctx, key, prog_data, prog, fp, v.do_dual_src,
>                       v.runtime_check_aads_emit, INTEL_DEBUG & DEBUG_WM);
> -      assembly = g.generate_assembly(&v.instructions, simd16_instructions,
> +      assembly = g.generate_assembly(simd8_instructions, 
simd16_instructions,
>                                       final_assembly_size);
>     }
>  
> diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_state.c
> index c03d19d..c3e9316 100644
> --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
> @@ -223,9 +223,9 @@ upload_ps_state(struct brw_context *brw)
>        _mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, 
false);
>     assert(min_inv_per_frag >= 1);
>  
> -   if (brw->wm.prog_data->prog_offset_16) {
> +   if (brw->wm.prog_data->prog_offset_16 || brw->wm.prog_data->no_8) {
>        dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
> -      if (min_inv_per_frag == 1) {
> +      if (!brw->wm.prog_data->no_8 && min_inv_per_frag == 1) {
>           dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
>           dw5 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
>                   GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
> diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c 
b/src/mesa/drivers/dri/i965/gen8_ps_state.c
> index f58d49c..49d4fe0 100644
> --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
> +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
> @@ -195,9 +195,9 @@ upload_ps_state(struct brw_context *brw)
>        _mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, 
false);
>     assert(min_invocations_per_fragment >= 1);
>  
> -   if (brw->wm.prog_data->prog_offset_16) {
> +   if (brw->wm.prog_data->prog_offset_16 || brw->wm.prog_data->no_8) {
>        dw6 |= GEN7_PS_16_DISPATCH_ENABLE;
> -      if (min_invocations_per_fragment == 1) {
> +      if (!brw->wm.prog_data->no_8 && min_invocations_per_fragment == 1) {
>           dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
>           dw7 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
>                   GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
> diff --git a/src/mesa/drivers/dri/i965/intel_debug.c 
b/src/mesa/drivers/dri/i965/intel_debug.c
> index c72fce2..1fb8b6c 100644
> --- a/src/mesa/drivers/dri/i965/intel_debug.c
> +++ b/src/mesa/drivers/dri/i965/intel_debug.c
> @@ -66,6 +66,7 @@ static const struct dri_debug_control debug_control[] = {
>     { "nodualobj", DEBUG_NO_DUAL_OBJECT_GS },
>     { "optimizer", DEBUG_OPTIMIZER },
>     { "noann", DEBUG_NO_ANNOTATION },
> +   { "no8",  DEBUG_NO8 },
>     { NULL,    0 }
>  };
>  
> diff --git a/src/mesa/drivers/dri/i965/intel_debug.h 
b/src/mesa/drivers/dri/i965/intel_debug.h
> index 37dc34a..8e1c299 100644
> --- a/src/mesa/drivers/dri/i965/intel_debug.h
> +++ b/src/mesa/drivers/dri/i965/intel_debug.h
> @@ -62,6 +62,7 @@ extern uint64_t INTEL_DEBUG;
>  #define DEBUG_NO_DUAL_OBJECT_GS 0x80000000
>  #define DEBUG_OPTIMIZER   0x100000000
>  #define DEBUG_NO_ANNOTATION 0x200000000
> +#define DEBUG_NO8        0x40000000
>  
>  #ifdef HAVE_ANDROID_PLATFORM
>  #define LOG_TAG "INTEL-MESA"
> 

I like how you made the flags control whether we compile a SIMD8 shader, and 
then made the state upload code just upload whatever programs are there.

It seems like the state upload code could get refactored a little bit:
- Figure out which programs exist (8/16/32...?); assign them to KSP 0/1/2
- Upload the offsets/starting registers for KSP 0/1/2
Might be a nice follow-up patch anyway.  In fact, I think you wrote something 
along those lines - maybe it was in your branch?

I'd also like to see a patch to allow register spilling in SIMD16 mode if 
INTEL_DEBUG=no8.  Everything should be in place, you just need to whack 
brw_fs.cpp's dispatch_width == 16 check in the !allocated_without_spills 
block.

But I think those could be done in other patches.  I'm fairly ambivalent to 
whether you add brw->no_simd8 here or in another patch.  Perhaps make it brw-
>wm.no_simd8, though?

With or without those suggestions,
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20140714/1bb0ee7e/attachment.sig>


More information about the mesa-dev mailing list