[Mesa-dev] [PATCH v02 28/37] i965: Port gen6+ 3DSTATE_VS to genxml.

Kenneth Graunke kenneth at whitecape.org
Thu Apr 27 06:44:00 UTC 2017


On Monday, April 24, 2017 3:19:23 PM PDT Rafael Antognolli wrote:
[snip]
> diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
> index 17b8118..b2d2306 100644
> --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
> @@ -68,116 +68,3 @@ const struct brw_tracked_state gen6_vs_push_constants = {
>     },
>     .emit = gen6_upload_vs_push_constants,
>  };
> -
> -static void
> -upload_vs_state(struct brw_context *brw)
> -{
> -   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> -   const struct brw_stage_state *stage_state = &brw->vs.base;
> -   const struct brw_stage_prog_data *prog_data = stage_state->prog_data;
> -   const struct brw_vue_prog_data *vue_prog_data =
> -      brw_vue_prog_data(stage_state->prog_data);
> -   uint32_t floating_point_mode = 0;
> -
> -   /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
> -    * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
> -    *
> -    *   [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS
> -    *   command that causes the VS Function Enable to toggle. Pipeline
> -    *   flush can be executed by sending a PIPE_CONTROL command with CS
> -    *   stall bit set and a post sync operation.
> -    *
> -    * We've already done such a flush at the start of state upload, so we
> -    * don't need to do another one here.
> -    */

Please don't lose this comment.  Otherwise it looks like we've merely
forgotten to do the flush, and it's not missing intentionally.

> -
> -   if (stage_state->push_const_size == 0) {
> -      /* Disable the push constant buffers. */
> -      BEGIN_BATCH(5);
> -      OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2));
> -      OUT_BATCH(0);
> -      OUT_BATCH(0);
> -      OUT_BATCH(0);
> -      OUT_BATCH(0);
> -      ADVANCE_BATCH();
> -   } else {
> -      BEGIN_BATCH(5);
> -      OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 |
> -		GEN6_CONSTANT_BUFFER_0_ENABLE |
> -		(5 - 2));
> -      /* Pointer to the VS constant buffer.  Covered by the set of
> -       * state flags from gen6_upload_vs_constants
> -       */
> -      OUT_BATCH(stage_state->push_const_offset +
> -		stage_state->push_const_size - 1);
> -      OUT_BATCH(0);
> -      OUT_BATCH(0);
> -      OUT_BATCH(0);
> -      ADVANCE_BATCH();
> -   }
> -
> -   if (prog_data->use_alt_mode)
> -      floating_point_mode = GEN6_VS_FLOATING_POINT_MODE_ALT;
> -
> -   BEGIN_BATCH(6);
> -   OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
> -   OUT_BATCH(stage_state->prog_offset);
> -   OUT_BATCH(floating_point_mode |
> -	     ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT) |
> -             ((prog_data->binding_table.size_bytes / 4) <<
> -              GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
> -
> -   if (prog_data->total_scratch) {
> -      OUT_RELOC(stage_state->scratch_bo,
> -		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> -		ffs(stage_state->per_thread_scratch) - 11);
> -   } else {
> -      OUT_BATCH(0);
> -   }
> -
> -   OUT_BATCH((prog_data->dispatch_grf_start_reg <<
> -              GEN6_VS_DISPATCH_START_GRF_SHIFT) |
> -	     (vue_prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
> -	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
> -
> -   OUT_BATCH(((devinfo->max_vs_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT) |
> -	     GEN6_VS_STATISTICS_ENABLE |
> -	     GEN6_VS_ENABLE);
> -   ADVANCE_BATCH();
> -
> -   /* Based on my reading of the simulator, the VS constants don't get
> -    * pulled into the VS FF unit until an appropriate pipeline flush
> -    * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds
> -    * references to them into a little FIFO.  The flushes are common,
> -    * but don't reliably happen between this and a 3DPRIMITIVE, causing
> -    * the primitive to use the wrong constants.  Then the FIFO
> -    * containing the constant setup gets added to again on the next
> -    * constants change, and eventually when a flush does happen the
> -    * unit is overwhelmed by constant changes and dies.
> -    *
> -    * To avoid this, send a PIPE_CONTROL down the line that will
> -    * update the unit immediately loading the constants.  The flush
> -    * type bits here were those set by the STATE_BASE_ADDRESS whose
> -    * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the
> -    * bug reports that led to this workaround, and may be more than
> -    * what is strictly required to avoid the issue.
> -    */

Please don't lose this comment.  The reason for the flush is not at all
obvious, and without the explanation, someone might try and remove it.

> -   brw_emit_pipe_control_flush(brw,
> -                               PIPE_CONTROL_DEPTH_STALL |
> -                               PIPE_CONTROL_INSTRUCTION_INVALIDATE |
> -                               PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> -}
> -
> -const struct brw_tracked_state gen6_vs_state = {
> -   .dirty = {
> -      .mesa  = _NEW_PROGRAM_CONSTANTS |
> -               _NEW_TRANSFORM,
> -      .brw   = BRW_NEW_BATCH |
> -               BRW_NEW_BLORP |
> -               BRW_NEW_CONTEXT |
> -               BRW_NEW_PUSH_CONSTANT_ALLOCATION |
> -               BRW_NEW_VERTEX_PROGRAM |
> -               BRW_NEW_VS_PROG_DATA,
> -   },
> -   .emit = upload_vs_state,
> -};
[snip]
> diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
> index 7ed79b2..d1609f6 100644
> --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
> @@ -27,6 +27,9 @@
>  #include "genxml/gen_macros.h"
>  
>  #include "brw_context.h"
> +#if GEN_GEN == 6
> +#include "brw_defines.h"
> +#endif
>  #include "brw_state.h"
>  #include "brw_wm.h"
>  #include "brw_util.h"
> @@ -974,6 +977,103 @@ static const struct brw_tracked_state genX(wm_state) = {
>     .emit = genX(upload_wm),
>  };
>  
> +/* ---------------------------------------------------------------------- */
> +
> +#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
> +   pkt.KernelStartPointer = stage_state->prog_offset;                     \
> +   pkt.SamplerCount       =                                               \
> +      DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4);          \
> +   pkt.BindingTableEntryCount =                                           \
> +      stage_prog_data->binding_table.size_bytes / 4;                      \
> +   pkt.FloatingPointMode  = stage_prog_data->use_alt_mode;                \
> +                                                                          \
> +   if (stage_prog_data->total_scratch) {                                  \
> +      pkt.ScratchSpaceBasePointer =                                       \
> +         render_bo(stage_state->scratch_bo, 0);                           \
> +      pkt.PerThreadScratchSpace =                                         \
> +         ffs(stage_state->per_thread_scratch) - 11;                       \
> +   }                                                                      \
> +                                                                          \
> +   pkt.DispatchGRFStartRegisterForURBData =                               \
> +      stage_prog_data->dispatch_grf_start_reg;                            \
> +   pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length;       \
> +   pkt.prefix##URBEntryReadOffset = 0;                                    \
> +                                                                          \
> +   pkt.StatisticsEnable = true;                                           \
> +   pkt.Enable           = true;
> +
> +
> +static void
> +genX(upload_vs_state)(struct brw_context *brw)
> +{
> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +   const struct brw_stage_state *stage_state = &brw->vs.base;
> +
> +   /* BRW_NEW_VS_PROG_DATA */
> +   const struct brw_vue_prog_data *vue_prog_data =
> +      brw_vue_prog_data(brw->vs.base.prog_data);
> +   const struct brw_stage_prog_data *stage_prog_data = &vue_prog_data->base;
> +
> +#if GEN_GEN >= 8
> +   /* _NEW_TRANSFORM */
> +   struct gl_context *ctx = &brw->ctx;
> +   const struct gl_transform_attrib *transform = &ctx->Transform;
> +#endif
> +
> +   assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 ||
> +          vue_prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT);
> +
> +#if GEN_GEN < 7
> +   brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), cvs) {

lol...cvs :)

> +      if (stage_state->push_const_size != 0) {
> +         cvs.Buffer0Valid = true;
> +         cvs.PointertoVSConstantBuffer0 = stage_state->push_const_offset;
> +         cvs.VSConstantBuffer0ReadLength = stage_state->push_const_size - 1;
> +      }
> +   }
> +#endif
> +
> +   if (devinfo->is_ivybridge)

Doing

   if (GEN_GEN == 7 && devinfo->is_ivybridge)

looks a bit silly but would make this compile away on other generations.

> +      gen7_emit_vs_workaround_flush(brw);
> +
> +   brw_batch_emit(brw, GENX(3DSTATE_VS), vs) {
> +      INIT_THREAD_DISPATCH_FIELDS(vs, Vertex);
> +
> +      vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
> +
> +#if GEN_GEN >= 8
> +      vs.SIMD8DispatchEnable =
> +         vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8;
> +
> +      vs.UserClipDistanceClipTestEnableBitmask = transform->ClipPlanesEnabled;
> +      vs.UserClipDistanceCullTestEnableBitmask =
> +         vue_prog_data->cull_distance_mask;

We should drop the vs.UserClipDistance stuff - I removed it in commit
903056e016e3ea52c2f493f8b0938b519ee40894.

> +#endif
> +   }
> +
> +#if GEN_GEN < 7
> +   brw_emit_pipe_control_flush(brw,
> +                               PIPE_CONTROL_DEPTH_STALL |
> +                               PIPE_CONTROL_INSTRUCTION_INVALIDATE |
> +                               PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> +#endif
> +}
> +
> +static const struct brw_tracked_state genX(vs_state) = {
> +   .dirty = {
> +      .mesa  = _NEW_TRANSFORM |
> +               (GEN_GEN < 7 ? _NEW_PROGRAM_CONSTANTS : 0),

_NEW_TRANSFORM should only be on Gen6 - .mesa should be 0 on Gen7+.

With those changes,
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

> +      .brw   = BRW_NEW_BATCH |
> +               BRW_NEW_BLORP |
> +               BRW_NEW_CONTEXT |
> +               BRW_NEW_VS_PROG_DATA |
> +               (GEN_GEN < 7 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
> +                              BRW_NEW_VERTEX_PROGRAM
> +                            : 0),
> +   },
> +   .emit = genX(upload_vs_state),
> +};
> +
>  #endif
>  
>  /* ---------------------------------------------------------------------- */
> @@ -1868,7 +1968,7 @@ genX(init_atoms)(struct brw_context *brw)
>        &gen6_sampler_state,
>        &gen6_multisample_state,
>  
> -      &gen6_vs_state,
> +      &genX(vs_state),
>        &gen6_gs_state,
>        &genX(clip_state),
>        &genX(sf_state),
> @@ -1952,7 +2052,7 @@ genX(init_atoms)(struct brw_context *brw)
>        &brw_gs_samplers,
>        &gen6_multisample_state,
>  
> -      &gen7_vs_state,
> +      &genX(vs_state),
>        &gen7_hs_state,
>        &gen7_te_state,
>        &gen7_ds_state,
> @@ -2039,7 +2139,7 @@ genX(init_atoms)(struct brw_context *brw)
>        &brw_gs_samplers,
>        &gen8_multisample_state,
>  
> -      &gen8_vs_state,
> +      &genX(vs_state),
>        &gen8_hs_state,
>        &gen7_te_state,
>        &gen8_ds_state,
> 

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170426/ec422e05/attachment-0001.sig>


More information about the mesa-dev mailing list