[Mesa-dev] [PATCH 21/22] i965/gen7: Generalize gen7_vs_state in preparation for GS.

Thu Aug 29 16:06:29 PDT 2013

Kenneth Graunke <kenneth at whitecape.org> writes:

> On 08/26/2013 03:12 PM, Paul Berry wrote:
>> ---
>>   src/mesa/drivers/dri/i965/brw_state.h     |  41 ++++++++++
>>   src/mesa/drivers/dri/i965/gen7_vs_state.c | 123 ++++++++++++++++++++----------
>>   2 files changed, 122 insertions(+), 42 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
>> index b54338a..efef994 100644
>> --- a/src/mesa/drivers/dri/i965/brw_state.h
>> +++ b/src/mesa/drivers/dri/i965/brw_state.h
>> @@ -128,6 +128,38 @@ extern const struct brw_tracked_state gen7_wm_state;
>>   extern const struct brw_tracked_state haswell_cut_index;
>>
>>
>> +/**
>> + * Parameters that differ between Gen7 VS and GS state upload commands.
>> + */
>> +struct gen7_vec4_upload_params
>> +{
>> +   /**
>> +    * Command used to set the binding table pointers for this stage.
>> +    */
>> +   unsigned binding_table_pointers_cmd;
>> +
>> +   /**
>> +    * Command used to set the sampler state pointers for this stage.
>> +    */
>> +   unsigned sampler_state_pointers_cmd;
>> +
>> +   /**
>> +    * Command used to send constants for this stage.
>> +    */
>> +   unsigned constant_cmd;
>> +
>> +   /**
>> +    * Command used to send state for this stage.
>> +    */
>> +   unsigned state_cmd;
>> +
>> +   /**
>> +    * Size of the state command for this stage.
>> +    */
>> +   unsigned state_cmd_size;
>> +};
>> +
>> +
>>   /* brw_misc_state.c */
>>   void brw_upload_invariant_state(struct brw_context *brw);
>>   uint32_t
>> @@ -240,6 +272,15 @@ brw_vec4_upload_binding_table(struct brw_context *brw,
>>                                 struct brw_vec4_context_base *vec4_ctx,
>>                                 const struct brw_vec4_prog_data *prog_data);
>>
>> +/* gen7_vs_state.c */
>> +void
>> +gen7_upload_vec4_state(struct brw_context *brw,
>> +                       const struct gen7_vec4_upload_params *upload_params,
>> +                       const struct brw_vec4_context_base *vec4_ctx,
>> +                       bool active, bool alt_floating_point_mode,
>> +                       const struct brw_vec4_prog_data *prog_data,
>> +                       const unsigned *stage_specific_cmd_data);
>> +
>>   #ifdef __cplusplus
>>   }
>>   #endif
>> diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
>> index 30fe802..fd81112 100644
>> --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
>> +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
>> @@ -29,33 +29,31 @@
>>   #include "program/prog_statevars.h"
>>   #include "intel_batchbuffer.h"
>>
>> -static void
>> -upload_vs_state(struct brw_context *brw)
>> -{
>> -   struct gl_context *ctx = &brw->ctx;
>> -   const struct brw_vec4_context_base *vec4_ctx = &brw->vs.base;
>> -   uint32_t floating_point_mode = 0;
>> -   const int max_threads_shift = brw->is_haswell ?
>> -      HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT;
>>
>> -   gen7_emit_vs_workaround_flush(brw);
>> -
>> -   /* BRW_NEW_VS_BINDING_TABLE */
>> +void
>> +gen7_upload_vec4_state(struct brw_context *brw,
>> +                       const struct gen7_vec4_upload_params *upload_params,
>> +                       const struct brw_vec4_context_base *vec4_ctx,
>> +                       bool active, bool alt_floating_point_mode,
>> +                       const struct brw_vec4_prog_data *prog_data,
>> +                       const unsigned *stage_specific_cmd_data)
>> +{
>> +   /* BRW_NEW_*_BINDING_TABLE */
>>      BEGIN_BATCH(2);
>> -   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_VS << 16 | (2 - 2));
>> +   OUT_BATCH(upload_params->binding_table_pointers_cmd << 16 | (2 - 2));
>>      OUT_BATCH(vec4_ctx->bind_bo_offset);
>>      ADVANCE_BATCH();
>>
>>      /* CACHE_NEW_SAMPLER */
>>      BEGIN_BATCH(2);
>> -   OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2));
>> +   OUT_BATCH(upload_params->sampler_state_pointers_cmd << 16 | (2 - 2));
>>      OUT_BATCH(vec4_ctx->sampler_offset);
>>      ADVANCE_BATCH();
>>
>> -   if (vec4_ctx->push_const_size == 0) {
>> +   if (!active || vec4_ctx->push_const_size == 0) {
>>         /* Disable the push constant buffers. */
>>         BEGIN_BATCH(7);
>> -      OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2));
>> +      OUT_BATCH(upload_params->constant_cmd << 16 | (7 - 2));
>>         OUT_BATCH(0);
>>         OUT_BATCH(0);
>>         OUT_BATCH(0);
>> @@ -65,10 +63,10 @@ upload_vs_state(struct brw_context *brw)
>>         ADVANCE_BATCH();
>>      } else {
>>         BEGIN_BATCH(7);
>> -      OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2));
>> +      OUT_BATCH(upload_params->constant_cmd << 16 | (7 - 2));
>>         OUT_BATCH(vec4_ctx->push_const_size);
>>         OUT_BATCH(0);
>> -      /* Pointer to the VS constant buffer.  Covered by the set of
>> +      /* Pointer to the stage's constant buffer.  Covered by the set of
>>          * state flags from gen6_prepare_wm_contants
>>          */
>>         OUT_BATCH(vec4_ctx->push_const_offset | GEN7_MOCS_L3);
>> @@ -78,36 +76,77 @@ upload_vs_state(struct brw_context *brw)
>>         ADVANCE_BATCH();
>>      }
>>
>> +   BEGIN_BATCH(upload_params->state_cmd_size);
>> +   OUT_BATCH(upload_params->state_cmd << 16 |
>> +             (upload_params->state_cmd_size - 2));
>> +   if (active) {
>> +      OUT_BATCH(vec4_ctx->prog_offset);
>> +      OUT_BATCH((alt_floating_point_mode ? GEN6_FLOATING_POINT_MODE_ALT
>> +                 : GEN6_FLOATING_POINT_MODE_IEEE_754) |
>> +                ((ALIGN(vec4_ctx->sampler_count, 4)/4) <<
>> +                 GEN6_SAMPLER_COUNT_SHIFT));
>> +
>> +      if (prog_data->total_scratch) {
>> +         OUT_RELOC(vec4_ctx->scratch_bo,
>> +                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
>> +                   ffs(prog_data->total_scratch) - 11);
>> +      } else {
>> +         OUT_BATCH(0);
>> +      }
>> +   } else {
>> +      OUT_BATCH(0); /* prog_bo */
>> +      OUT_BATCH((0 << GEN6_SAMPLER_COUNT_SHIFT) |
>> +                (0 << GEN6_BINDING_TABLE_ENTRY_COUNT_SHIFT));
>> +      OUT_BATCH(0); /* scratch space base offset */
>> +   }
>> +   for (int i = 0; i < upload_params->state_cmd_size - 4; ++i)
>> +      OUT_BATCH(stage_specific_cmd_data[i]);
>> +   ADVANCE_BATCH();
>> +}
>> +
>> +
>> +static const struct gen7_vec4_upload_params vs_upload_params = {
>> +   .binding_table_pointers_cmd = _3DSTATE_BINDING_TABLE_POINTERS_VS,
>> +   .sampler_state_pointers_cmd = _3DSTATE_SAMPLER_STATE_POINTERS_VS,
>> +   .constant_cmd = _3DSTATE_CONSTANT_VS,
>> +   .state_cmd = _3DSTATE_VS,
>> +   .state_cmd_size = 6,
>> +};
>> +
>> +
>> +static void
>> +upload_vs_state(struct brw_context *brw)
>> +{
>> +   struct gl_context *ctx = &brw->ctx;
>> +   const struct brw_vec4_context_base *vec4_ctx = &brw->vs.base;
>> +   const int max_threads_shift = brw->is_haswell ?
>> +      HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT;
>> +   /* CACHE_NEW_VS_PROG */
>> +   const struct brw_vec4_prog_data *prog_data = &brw->vs.prog_data->base;
>> +
>> +   gen7_emit_vs_workaround_flush(brw);
>> +
>>      /* Use ALT floating point mode for ARB vertex programs, because they
>>       * require 0^0 == 1.
>>       */
>> -   if (ctx->Shader.CurrentVertexProgram == NULL)
>> -      floating_point_mode = GEN6_FLOATING_POINT_MODE_ALT;
>> -
>> -   BEGIN_BATCH(6);
>> -   OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
>> -   OUT_BATCH(vec4_ctx->prog_offset);
>> -   OUT_BATCH(floating_point_mode |
>> -	     ((ALIGN(vec4_ctx->sampler_count, 4)/4) <<
>> -              GEN6_SAMPLER_COUNT_SHIFT));
>> -
>> -   if (brw->vs.prog_data->base.total_scratch) {
>> -      OUT_RELOC(vec4_ctx->scratch_bo,
>> -		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
>> -		ffs(brw->vs.prog_data->base.total_scratch) - 11);
>> -   } else {
>> -      OUT_BATCH(0);
>> -   }
>> +   bool alt_floating_point_mode = (ctx->Shader.CurrentVertexProgram == NULL);
>>
>> -   OUT_BATCH((brw->vs.prog_data->base.dispatch_grf_start_reg <<
>> -              GEN6_VS_DISPATCH_START_GRF_SHIFT) |
>> -	     (brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
>> -	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
>> +   unsigned stage_specific_cmd_data[2];
>> +   stage_specific_cmd_data[0] =
>> +      (prog_data->dispatch_grf_start_reg <<
>> +       GEN6_VS_DISPATCH_START_GRF_SHIFT) |
>> +      (prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
>> +      (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT);
>> +   stage_specific_cmd_data[1] =
>> +      ((brw->max_vs_threads - 1) << max_threads_shift) |
>> +      GEN6_VS_STATISTICS_ENABLE |
>> +      GEN6_VS_ENABLE;
>>
>> -   OUT_BATCH(((brw->max_vs_threads - 1) << max_threads_shift) |
>> -	     GEN6_VS_STATISTICS_ENABLE |
>> -	     GEN6_VS_ENABLE);
>> -   ADVANCE_BATCH();
>> +   /* BRW_NEW_VS_BINDING_TABLE */
>> +   /* CACHE_NEW_SAMPLER */
>> +   gen7_upload_vec4_state(brw, &vs_upload_params, vec4_ctx, true /* active */,
>> +                          alt_floating_point_mode, prog_data,
>> +                          stage_specific_cmd_data);
>>   }
>>
>>   const struct brw_tracked_state gen7_vs_state = {
>>
>
> I can't quite put my finger on why, but I really dislike this code 
> reuse.  It just seems really awkward to me.

The struct argument is what makes me cringe.  If upload_vs_state() was:

static void
upload_vs_state(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   uint32_t floating_point_mode = 0;
   const int max_threads_shift = brw->is_haswell ?
      HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT;

   /* the VS has a special requirement for synchronization due to a bug
    * in its constant handling.
    */
   gen7_emit_vs_workaround_flush(brw);

   emit_binding_table_pointers(_3DSTATE_BINDING_TABLE_POINTERS_VS, brw->vs.bind_bo_offset);
   emit_sampler(_3DSTATE_SAMPLER_STATE_POINTERS_VS, brw->vs.sampler_offset);
   emit_constants(3DSTATE_CONSTANT_VS, &brw->vs.stage_state);

   /* inline emit of 3DSTATE_VS just like we have today, since there's a
    * whole bunch of stage-specific stuff as is evident from the
    * alt_floating_point_mode, state_cmd, state_cmd_size,
    * stage_specific_cmd_data arguments.
    */
}

that would share the definitely-common code without the contortions to
make 3DSTATE_VS be "shared".  Plus you get to fairly trivially reuse
those 3 shared packets from blorp, which this patch didn't do today.
(only "fairly" trivially because you'd want to have individual
enable/disable packets for the constants to really trivially reuse from
blorp).
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 835 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20130829/4c38de92/attachment-0001.pgp>