[Mesa-dev] [PATCH] i965: Add an option to not generate the SIMD8 fragment shader

Kristian Høgsberg krh at bitplanet.net
Mon Jul 14 10:27:06 PDT 2014


On Mon, Jul 14, 2014 at 9:17 AM, Kenneth Graunke <kenneth at whitecape.org> wrote:
> On Friday, July 11, 2014 11:26:30 AM Kristian Høgsberg wrote:
>> For now, this can only be triggered with a new 'no8' INTEL_DEBUG option
>>
>> Signed-off-by: Kristian Høgsberg <krh at bitplanet.net>
>> ---
>>  src/mesa/drivers/dri/i965/brw_context.h   |  2 ++
>>  src/mesa/drivers/dri/i965/brw_fs.cpp      | 14 ++++++++++++--
>>  src/mesa/drivers/dri/i965/gen7_wm_state.c |  4 ++--
>>  src/mesa/drivers/dri/i965/gen8_ps_state.c |  4 ++--
>>  src/mesa/drivers/dri/i965/intel_debug.c   |  1 +
>>  src/mesa/drivers/dri/i965/intel_debug.h   |  1 +
>>  6 files changed, 20 insertions(+), 6 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.h
> b/src/mesa/drivers/dri/i965/brw_context.h
>> index 2943a20..11cea04 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.h
>> +++ b/src/mesa/drivers/dri/i965/brw_context.h
>> @@ -340,6 +340,7 @@ struct brw_wm_prog_data {
>>        /** @} */
>>     } binding_table;
>>
>> +   bool no_8;
>>     bool dual_src_blend;
>>     bool uses_pos_offset;
>>     bool uses_omask;
>> @@ -1039,6 +1040,7 @@ struct brw_context
>>     bool has_compr4;
>>     bool has_negative_rhw_bug;
>>     bool has_pln;
>> +   bool no_simd8;
>>
>>     /**
>>      * Some versions of Gen hardware don't do centroid interpolation
> correctly
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> index a3ad375..f018d94 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> @@ -3258,15 +3258,25 @@ brw_wm_fs_emit(struct brw_context *brw,
>>        }
>>     }
>>
>> +   exec_list *simd8_instructions;
>> +   int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || brw->no_simd8;
>> +   if (no_simd8 && simd16_instructions) {
>> +      simd8_instructions = NULL;
>> +      prog_data->no_8 = true;
>> +   } else {
>> +      simd8_instructions = &v.instructions;
>> +      prog_data->no_8 = false;
>> +   }
>> +
>>     const unsigned *assembly = NULL;
>>     if (brw->gen >= 8) {
>>        gen8_fs_generator g(brw, mem_ctx, key, prog_data, prog, fp,
> v.do_dual_src);
>> -      assembly = g.generate_assembly(&v.instructions, simd16_instructions,
>> +      assembly = g.generate_assembly(simd8_instructions,
> simd16_instructions,
>>                                       final_assembly_size);
>>     } else {
>>        fs_generator g(brw, mem_ctx, key, prog_data, prog, fp, v.do_dual_src,
>>                       v.runtime_check_aads_emit, INTEL_DEBUG & DEBUG_WM);
>> -      assembly = g.generate_assembly(&v.instructions, simd16_instructions,
>> +      assembly = g.generate_assembly(simd8_instructions,
> simd16_instructions,
>>                                       final_assembly_size);
>>     }
>>
>> diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c
> b/src/mesa/drivers/dri/i965/gen7_wm_state.c
>> index c03d19d..c3e9316 100644
>> --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
>> +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
>> @@ -223,9 +223,9 @@ upload_ps_state(struct brw_context *brw)
>>        _mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program,
> false);
>>     assert(min_inv_per_frag >= 1);
>>
>> -   if (brw->wm.prog_data->prog_offset_16) {
>> +   if (brw->wm.prog_data->prog_offset_16 || brw->wm.prog_data->no_8) {
>>        dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
>> -      if (min_inv_per_frag == 1) {
>> +      if (!brw->wm.prog_data->no_8 && min_inv_per_frag == 1) {
>>           dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
>>           dw5 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
>>                   GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
>> diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c
> b/src/mesa/drivers/dri/i965/gen8_ps_state.c
>> index f58d49c..49d4fe0 100644
>> --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
>> +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
>> @@ -195,9 +195,9 @@ upload_ps_state(struct brw_context *brw)
>>        _mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program,
> false);
>>     assert(min_invocations_per_fragment >= 1);
>>
>> -   if (brw->wm.prog_data->prog_offset_16) {
>> +   if (brw->wm.prog_data->prog_offset_16 || brw->wm.prog_data->no_8) {
>>        dw6 |= GEN7_PS_16_DISPATCH_ENABLE;
>> -      if (min_invocations_per_fragment == 1) {
>> +      if (!brw->wm.prog_data->no_8 && min_invocations_per_fragment == 1) {
>>           dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
>>           dw7 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
>>                   GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
>> diff --git a/src/mesa/drivers/dri/i965/intel_debug.c
> b/src/mesa/drivers/dri/i965/intel_debug.c
>> index c72fce2..1fb8b6c 100644
>> --- a/src/mesa/drivers/dri/i965/intel_debug.c
>> +++ b/src/mesa/drivers/dri/i965/intel_debug.c
>> @@ -66,6 +66,7 @@ static const struct dri_debug_control debug_control[] = {
>>     { "nodualobj", DEBUG_NO_DUAL_OBJECT_GS },
>>     { "optimizer", DEBUG_OPTIMIZER },
>>     { "noann", DEBUG_NO_ANNOTATION },
>> +   { "no8",  DEBUG_NO8 },
>>     { NULL,    0 }
>>  };
>>
>> diff --git a/src/mesa/drivers/dri/i965/intel_debug.h
> b/src/mesa/drivers/dri/i965/intel_debug.h
>> index 37dc34a..8e1c299 100644
>> --- a/src/mesa/drivers/dri/i965/intel_debug.h
>> +++ b/src/mesa/drivers/dri/i965/intel_debug.h
>> @@ -62,6 +62,7 @@ extern uint64_t INTEL_DEBUG;
>>  #define DEBUG_NO_DUAL_OBJECT_GS 0x80000000
>>  #define DEBUG_OPTIMIZER   0x100000000
>>  #define DEBUG_NO_ANNOTATION 0x200000000
>> +#define DEBUG_NO8        0x40000000
>>
>>  #ifdef HAVE_ANDROID_PLATFORM
>>  #define LOG_TAG "INTEL-MESA"
>>
>
> I like how you made the flags control whether we compile a SIMD8 shader, and
> then made the state upload code just upload whatever programs are there.
>
> It seems like the state upload code could get refactored a little bit:
> - Figure out which programs exist (8/16/32...?); assign them to KSP 0/1/2
> - Upload the offsets/starting registers for KSP 0/1/2
> Might be a nice follow-up patch anyway.  In fact, I think you wrote something
> along those lines - maybe it was in your branch?

I did write something like that:

http://cgit.freedesktop.org/~krh/mesa/commit/?h=fast-clear&id=0a96bf00d3c273e9b2b5f21c296347c9cbe5f0df

I sent out the no8 patch on its own, since we were talking about how
it would be useful.  I also rote a patch to make the logic around
picking ksp0/1/2 and the GRF offsets a little easier to read, but
decided on moving ksp assignment into the decision tree for setting
the enable bits and GRF offsets instead.

> I'd also like to see a patch to allow register spilling in SIMD16 mode if
> INTEL_DEBUG=no8.  Everything should be in place, you just need to whack
> brw_fs.cpp's dispatch_width == 16 check in the !allocated_without_spills
> block.
>
> But I think those could be done in other patches.  I'm fairly ambivalent to
> whether you add brw->no_simd8 here or in another patch.  Perhaps make it brw-
>>wm.no_simd8, though?

Right, that makes sense.  Actually, I've been wondering if the
no_simd8 flag should be part of the program key instead though.  Right
now, I rely on the pre-compile guessing the right key and compiling
and uploading the program while I have the no_simd8 flag set.  That
doesn't seem optimal

> With or without those suggestions,
> Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

thanks,
Kristian


More information about the mesa-dev mailing list