[Mesa-dev] [PATCH v02 28/37] i965: Port gen6+ 3DSTATE_VS to genxml.
Kenneth Graunke
kenneth at whitecape.org
Thu Apr 27 06:44:00 UTC 2017
On Monday, April 24, 2017 3:19:23 PM PDT Rafael Antognolli wrote:
[snip]
> diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
> index 17b8118..b2d2306 100644
> --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
> @@ -68,116 +68,3 @@ const struct brw_tracked_state gen6_vs_push_constants = {
> },
> .emit = gen6_upload_vs_push_constants,
> };
> -
> -static void
> -upload_vs_state(struct brw_context *brw)
> -{
> - const struct gen_device_info *devinfo = &brw->screen->devinfo;
> - const struct brw_stage_state *stage_state = &brw->vs.base;
> - const struct brw_stage_prog_data *prog_data = stage_state->prog_data;
> - const struct brw_vue_prog_data *vue_prog_data =
> - brw_vue_prog_data(stage_state->prog_data);
> - uint32_t floating_point_mode = 0;
> -
> - /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
> - * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
> - *
> - * [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS
> - * command that causes the VS Function Enable to toggle. Pipeline
> - * flush can be executed by sending a PIPE_CONTROL command with CS
> - * stall bit set and a post sync operation.
> - *
> - * We've already done such a flush at the start of state upload, so we
> - * don't need to do another one here.
> - */
Please don't lose this comment. Otherwise it looks like we've merely
forgotten to do the flush, and it's not missing intentionally.
> -
> - if (stage_state->push_const_size == 0) {
> - /* Disable the push constant buffers. */
> - BEGIN_BATCH(5);
> - OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2));
> - OUT_BATCH(0);
> - OUT_BATCH(0);
> - OUT_BATCH(0);
> - OUT_BATCH(0);
> - ADVANCE_BATCH();
> - } else {
> - BEGIN_BATCH(5);
> - OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 |
> - GEN6_CONSTANT_BUFFER_0_ENABLE |
> - (5 - 2));
> - /* Pointer to the VS constant buffer. Covered by the set of
> - * state flags from gen6_upload_vs_constants
> - */
> - OUT_BATCH(stage_state->push_const_offset +
> - stage_state->push_const_size - 1);
> - OUT_BATCH(0);
> - OUT_BATCH(0);
> - OUT_BATCH(0);
> - ADVANCE_BATCH();
> - }
> -
> - if (prog_data->use_alt_mode)
> - floating_point_mode = GEN6_VS_FLOATING_POINT_MODE_ALT;
> -
> - BEGIN_BATCH(6);
> - OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
> - OUT_BATCH(stage_state->prog_offset);
> - OUT_BATCH(floating_point_mode |
> - ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT) |
> - ((prog_data->binding_table.size_bytes / 4) <<
> - GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
> -
> - if (prog_data->total_scratch) {
> - OUT_RELOC(stage_state->scratch_bo,
> - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> - ffs(stage_state->per_thread_scratch) - 11);
> - } else {
> - OUT_BATCH(0);
> - }
> -
> - OUT_BATCH((prog_data->dispatch_grf_start_reg <<
> - GEN6_VS_DISPATCH_START_GRF_SHIFT) |
> - (vue_prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
> - (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
> -
> - OUT_BATCH(((devinfo->max_vs_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT) |
> - GEN6_VS_STATISTICS_ENABLE |
> - GEN6_VS_ENABLE);
> - ADVANCE_BATCH();
> -
> - /* Based on my reading of the simulator, the VS constants don't get
> - * pulled into the VS FF unit until an appropriate pipeline flush
> - * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds
> - * references to them into a little FIFO. The flushes are common,
> - * but don't reliably happen between this and a 3DPRIMITIVE, causing
> - * the primitive to use the wrong constants. Then the FIFO
> - * containing the constant setup gets added to again on the next
> - * constants change, and eventually when a flush does happen the
> - * unit is overwhelmed by constant changes and dies.
> - *
> - * To avoid this, send a PIPE_CONTROL down the line that will
> - * update the unit immediately loading the constants. The flush
> - * type bits here were those set by the STATE_BASE_ADDRESS whose
> - * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the
> - * bug reports that led to this workaround, and may be more than
> - * what is strictly required to avoid the issue.
> - */
Please don't lose this comment. The reason for the flush is not at all
obvious, and without the explanation, someone might try and remove it.
> - brw_emit_pipe_control_flush(brw,
> - PIPE_CONTROL_DEPTH_STALL |
> - PIPE_CONTROL_INSTRUCTION_INVALIDATE |
> - PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> -}
> -
> -const struct brw_tracked_state gen6_vs_state = {
> - .dirty = {
> - .mesa = _NEW_PROGRAM_CONSTANTS |
> - _NEW_TRANSFORM,
> - .brw = BRW_NEW_BATCH |
> - BRW_NEW_BLORP |
> - BRW_NEW_CONTEXT |
> - BRW_NEW_PUSH_CONSTANT_ALLOCATION |
> - BRW_NEW_VERTEX_PROGRAM |
> - BRW_NEW_VS_PROG_DATA,
> - },
> - .emit = upload_vs_state,
> -};
[snip]
> diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
> index 7ed79b2..d1609f6 100644
> --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
> @@ -27,6 +27,9 @@
> #include "genxml/gen_macros.h"
>
> #include "brw_context.h"
> +#if GEN_GEN == 6
> +#include "brw_defines.h"
> +#endif
> #include "brw_state.h"
> #include "brw_wm.h"
> #include "brw_util.h"
> @@ -974,6 +977,103 @@ static const struct brw_tracked_state genX(wm_state) = {
> .emit = genX(upload_wm),
> };
>
> +/* ---------------------------------------------------------------------- */
> +
> +#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
> + pkt.KernelStartPointer = stage_state->prog_offset; \
> + pkt.SamplerCount = \
> + DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \
> + pkt.BindingTableEntryCount = \
> + stage_prog_data->binding_table.size_bytes / 4; \
> + pkt.FloatingPointMode = stage_prog_data->use_alt_mode; \
> + \
> + if (stage_prog_data->total_scratch) { \
> + pkt.ScratchSpaceBasePointer = \
> + render_bo(stage_state->scratch_bo, 0); \
> + pkt.PerThreadScratchSpace = \
> + ffs(stage_state->per_thread_scratch) - 11; \
> + } \
> + \
> + pkt.DispatchGRFStartRegisterForURBData = \
> + stage_prog_data->dispatch_grf_start_reg; \
> + pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length; \
> + pkt.prefix##URBEntryReadOffset = 0; \
> + \
> + pkt.StatisticsEnable = true; \
> + pkt.Enable = true;
> +
> +
> +static void
> +genX(upload_vs_state)(struct brw_context *brw)
> +{
> + const struct gen_device_info *devinfo = &brw->screen->devinfo;
> + const struct brw_stage_state *stage_state = &brw->vs.base;
> +
> + /* BRW_NEW_VS_PROG_DATA */
> + const struct brw_vue_prog_data *vue_prog_data =
> + brw_vue_prog_data(brw->vs.base.prog_data);
> + const struct brw_stage_prog_data *stage_prog_data = &vue_prog_data->base;
> +
> +#if GEN_GEN >= 8
> + /* _NEW_TRANSFORM */
> + struct gl_context *ctx = &brw->ctx;
> + const struct gl_transform_attrib *transform = &ctx->Transform;
> +#endif
> +
> + assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 ||
> + vue_prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT);
> +
> +#if GEN_GEN < 7
> + brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), cvs) {
lol...cvs :)
> + if (stage_state->push_const_size != 0) {
> + cvs.Buffer0Valid = true;
> + cvs.PointertoVSConstantBuffer0 = stage_state->push_const_offset;
> + cvs.VSConstantBuffer0ReadLength = stage_state->push_const_size - 1;
> + }
> + }
> +#endif
> +
> + if (devinfo->is_ivybridge)
Doing
if (GEN_GEN == 7 && devinfo->is_ivybridge)
looks a bit silly but would make this compile away on other generations.
> + gen7_emit_vs_workaround_flush(brw);
> +
> + brw_batch_emit(brw, GENX(3DSTATE_VS), vs) {
> + INIT_THREAD_DISPATCH_FIELDS(vs, Vertex);
> +
> + vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
> +
> +#if GEN_GEN >= 8
> + vs.SIMD8DispatchEnable =
> + vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8;
> +
> + vs.UserClipDistanceClipTestEnableBitmask = transform->ClipPlanesEnabled;
> + vs.UserClipDistanceCullTestEnableBitmask =
> + vue_prog_data->cull_distance_mask;
We should drop the vs.UserClipDistance stuff - I removed it in commit
903056e016e3ea52c2f493f8b0938b519ee40894.
> +#endif
> + }
> +
> +#if GEN_GEN < 7
> + brw_emit_pipe_control_flush(brw,
> + PIPE_CONTROL_DEPTH_STALL |
> + PIPE_CONTROL_INSTRUCTION_INVALIDATE |
> + PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> +#endif
> +}
> +
> +static const struct brw_tracked_state genX(vs_state) = {
> + .dirty = {
> + .mesa = _NEW_TRANSFORM |
> + (GEN_GEN < 7 ? _NEW_PROGRAM_CONSTANTS : 0),
_NEW_TRANSFORM should only be on Gen6 - .mesa should be 0 on Gen7+.
With those changes,
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
> + .brw = BRW_NEW_BATCH |
> + BRW_NEW_BLORP |
> + BRW_NEW_CONTEXT |
> + BRW_NEW_VS_PROG_DATA |
> + (GEN_GEN < 7 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
> + BRW_NEW_VERTEX_PROGRAM
> + : 0),
> + },
> + .emit = genX(upload_vs_state),
> +};
> +
> #endif
>
> /* ---------------------------------------------------------------------- */
> @@ -1868,7 +1968,7 @@ genX(init_atoms)(struct brw_context *brw)
> &gen6_sampler_state,
> &gen6_multisample_state,
>
> - &gen6_vs_state,
> + &genX(vs_state),
> &gen6_gs_state,
> &genX(clip_state),
> &genX(sf_state),
> @@ -1952,7 +2052,7 @@ genX(init_atoms)(struct brw_context *brw)
> &brw_gs_samplers,
> &gen6_multisample_state,
>
> - &gen7_vs_state,
> + &genX(vs_state),
> &gen7_hs_state,
> &gen7_te_state,
> &gen7_ds_state,
> @@ -2039,7 +2139,7 @@ genX(init_atoms)(struct brw_context *brw)
> &brw_gs_samplers,
> &gen8_multisample_state,
>
> - &gen8_vs_state,
> + &genX(vs_state),
> &gen8_hs_state,
> &gen7_te_state,
> &gen8_ds_state,
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170426/ec422e05/attachment-0001.sig>
More information about the mesa-dev
mailing list