[Mesa-dev] [PATCH v02 29/37] i965: Port gen6+ state emitting code to genxml.

Kenneth Graunke kenneth at whitecape.org
Thu Apr 27 07:35:40 UTC 2017


On Monday, April 24, 2017 3:19:24 PM PDT Rafael Antognolli wrote:
[snip]
> diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c
> deleted file mode 100644
> index 1b5b782..0000000
> --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
> +++ /dev/null
> @@ -1,168 +0,0 @@
> -/*
> - * Copyright © 2013 Intel Corporation
> - *
> - * Permission is hereby granted, free of charge, to any person obtaining a
> - * copy of this software and associated documentation files (the "Software"),
> - * to deal in the Software without restriction, including without limitation
> - * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> - * and/or sell copies of the Software, and to permit persons to whom the
> - * Software is furnished to do so, subject to the following conditions:
> - *
> - * The above copyright notice and this permission notice (including the next
> - * paragraph) shall be included in all copies or substantial portions of the
> - * Software.
> - *
> - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> - * DEALINGS IN THE SOFTWARE.
> - */
> -
> -#include "brw_context.h"
> -#include "brw_state.h"
> -#include "brw_defines.h"
> -#include "intel_batchbuffer.h"
> -
> -static void
> -upload_gs_state(struct brw_context *brw)
> -{
> -   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> -   const struct brw_stage_state *stage_state = &brw->gs.base;
> -   const int max_threads_shift = brw->is_haswell ?
> -      HSW_GS_MAX_THREADS_SHIFT : GEN6_GS_MAX_THREADS_SHIFT;
> -   /* BRW_NEW_GEOMETRY_PROGRAM */
> -   bool active = brw->geometry_program;
> -   /* BRW_NEW_GS_PROG_DATA */
> -   const struct brw_stage_prog_data *prog_data = stage_state->prog_data;
> -   const struct brw_vue_prog_data *vue_prog_data =
> -      brw_vue_prog_data(stage_state->prog_data);
> -   const struct brw_gs_prog_data *gs_prog_data =
> -      brw_gs_prog_data(stage_state->prog_data);
> -
> -   /**
> -    * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
> -    * Geometry > Geometry Shader > State:
> -    *
> -    *     "Note: Because of corruption in IVB:GT2, software needs to flush the
> -    *     whole fixed function pipeline when the GS enable changes value in
> -    *     the 3DSTATE_GS."
> -    *
> -    * The hardware architects have clarified that in this context "flush the
> -    * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
> -    * Stall" bit set.
> -    */
> -   if (!brw->is_haswell && brw->gt == 2 && brw->gs.enabled != active)
> -      gen7_emit_cs_stall_flush(brw);

You're missing this flush and comment.  Please add it back.

> -
> -   if (active) {
> -      BEGIN_BATCH(7);
> -      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
> -      OUT_BATCH(stage_state->prog_offset);
> -      OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) <<
> -                 GEN6_GS_SAMPLER_COUNT_SHIFT) |
> -                ((prog_data->binding_table.size_bytes / 4) <<
> -                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
> -
> -      if (prog_data->total_scratch) {
> -         OUT_RELOC(stage_state->scratch_bo,
> -                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> -                   ffs(stage_state->per_thread_scratch) - 11);
> -      } else {
> -         OUT_BATCH(0);
> -      }
> -
> -      uint32_t dw4 =
> -         ((gs_prog_data->output_vertex_size_hwords * 2 - 1) <<
> -          GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) |
> -         (gs_prog_data->output_topology << GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) |
> -         (vue_prog_data->urb_read_length <<
> -          GEN6_GS_URB_READ_LENGTH_SHIFT) |
> -         (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |
> -         (prog_data->dispatch_grf_start_reg <<
> -          GEN6_GS_DISPATCH_START_GRF_SHIFT);
> -
> -      /* Note: the meaning of the GEN7_GS_REORDER_TRAILING bit changes between
> -       * Ivy Bridge and Haswell.
> -       *
> -       * On Ivy Bridge, setting this bit causes the vertices of a triangle
> -       * strip to be delivered to the geometry shader in an order that does
> -       * not strictly follow the OpenGL spec, but preserves triangle
> -       * orientation.  For example, if the vertices are (1, 2, 3, 4, 5), then
> -       * the geometry shader sees triangles:
> -       *
> -       * (1, 2, 3), (2, 4, 3), (3, 4, 5)
> -       *
> -       * (Clearing the bit is even worse, because it fails to preserve
> -       * orientation).
> -       *
> -       * Triangle strips with adjacency always ordered in a way that preserves
> -       * triangle orientation but does not strictly follow the OpenGL spec,
> -       * regardless of the setting of this bit.
> -       *
> -       * On Haswell, both triangle strips and triangle strips with adjacency
> -       * are always ordered in a way that preserves triangle orientation.
> -       * Setting this bit causes the ordering to strictly follow the OpenGL
> -       * spec.
> -       *
> -       * So in either case we want to set the bit.  Unfortunately on Ivy
> -       * Bridge this will get the order close to correct but not perfect.
> -       */

Please keep this comment - it's useful.

> -      uint32_t dw5 =
> -         ((devinfo->max_gs_threads - 1) << max_threads_shift) |
> -         (gs_prog_data->control_data_header_size_hwords <<
> -          GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) |
> -         ((gs_prog_data->invocations - 1) <<
> -          GEN7_GS_INSTANCE_CONTROL_SHIFT) |
> -         SET_FIELD(vue_prog_data->dispatch_mode, GEN7_GS_DISPATCH_MODE) |
> -         GEN6_GS_STATISTICS_ENABLE |
> -         (gs_prog_data->include_primitive_id ?
> -          GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) |
> -         GEN7_GS_REORDER_TRAILING |
> -         GEN7_GS_ENABLE;
> -      uint32_t dw6 = 0;
> -
> -      if (brw->is_haswell) {
> -         dw6 |= gs_prog_data->control_data_format <<
> -            HSW_GS_CONTROL_DATA_FORMAT_SHIFT;
> -      } else {
> -         dw5 |= gs_prog_data->control_data_format <<
> -            IVB_GS_CONTROL_DATA_FORMAT_SHIFT;
> -      }
> -
> -      OUT_BATCH(dw4);
> -      OUT_BATCH(dw5);
> -      OUT_BATCH(dw6);
> -      ADVANCE_BATCH();
> -   } else {
> -      BEGIN_BATCH(7);
> -      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
> -      OUT_BATCH(0); /* prog_bo */
> -      OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
> -                (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
> -      OUT_BATCH(0); /* scratch space base offset */
> -      OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
> -                (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
> -                GEN7_GS_INCLUDE_VERTEX_HANDLES |
> -                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
> -      OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
> -                GEN6_GS_STATISTICS_ENABLE);
> -      OUT_BATCH(0);
> -      ADVANCE_BATCH();
> -   }
> -   brw->gs.enabled = active;
> -}
> -
> -const struct brw_tracked_state gen7_gs_state = {
> -   .dirty = {
> -      .mesa  = _NEW_TRANSFORM,
> -      .brw   = BRW_NEW_BATCH |
> -               BRW_NEW_BLORP |
> -               BRW_NEW_CONTEXT |
> -               BRW_NEW_GEOMETRY_PROGRAM |
> -               BRW_NEW_GS_PROG_DATA,
> -   },
> -   .emit = upload_gs_state,
> -};
[snip]
> diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
> index d1609f6..d2a936b 100644
> --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
[snip]
> +static void
> +genX(upload_gs_state)(struct brw_context *brw)
> +{
> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +   const struct brw_stage_state *stage_state = &brw->gs.base;
> +   /* BRW_NEW_GEOMETRY_PROGRAM */
> +   bool active = brw->geometry_program;
> +
> +   /* BRW_NEW_GS_PROG_DATA */
> +   struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data;
> +   const struct brw_vue_prog_data *vue_prog_data =
> +      brw_vue_prog_data(stage_prog_data);
> +#if GEN_GEN >= 7
> +   const struct brw_gs_prog_data *gs_prog_data =
> +      brw_gs_prog_data(stage_prog_data);
> +#endif
> +
> +   /* _NEW_TRANSFORM */
> +#if GEN_GEN >= 8
> +   struct gl_context *ctx = &brw->ctx;
> +   const struct gl_transform_attrib *transform = &ctx->Transform;
> +#endif

We can delete this block.

> +
> +#if GEN_GEN < 7
> +   brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_GS), cgs) {
> +      if (active && stage_state->push_const_size != 0) {
> +         cgs.Buffer0Valid = true;
> +         cgs.PointertoGSConstantBuffer0 = stage_state->push_const_offset;
> +         cgs.GSConstantBuffer0ReadLength = stage_state->push_const_size - 1;
> +      }
> +   }
> +#endif
> +
> +   if (active) {
> +      brw_batch_emit(brw, GENX(3DSTATE_GS), gs) {
> +         INIT_THREAD_DISPATCH_FIELDS(gs, Vertex);
> +
> +#if GEN_GEN >= 7
> +         gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
> +         gs.OutputTopology = gs_prog_data->output_topology;
> +         gs.ControlDataHeaderSize =
> +            gs_prog_data->control_data_header_size_hwords;
> +
> +         gs.InstanceControl = gs_prog_data->invocations - 1;
> +         gs.DispatchMode = vue_prog_data->dispatch_mode;
> +
> +         gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
> +
> +         gs.ControlDataFormat = gs_prog_data->control_data_format;
> +#endif
> +
> +         gs.ReorderMode = TRAILING;
> +         gs.MaximumNumberofThreads =
> +            GEN_GEN == 8 ? (devinfo->max_gs_threads / 2 - 1)
> +                         : (devinfo->max_gs_threads - 1);
> +
> +#if GEN_GEN < 7
> +         gs.SOStatisticsEnable = true;
> +         gs.RenderingEnabled = 1;
> +         if (brw->geometry_program->info.has_transform_feedback_varyings)
> +            gs.SVBIPayloadEnable = true;
> +
> +         /* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it
> +          * was previously done for gen6.
> +          *
> +          * TODO: test with both disabled to see if the HW is behaving
> +          * as expected, like in gen7.
> +          */
> +         gs.SingleProgramFlow = true;
> +         gs.VectorMaskEnable = true;
> +#endif
> +
> +#if GEN_GEN >= 8
> +         gs.ExpectedVertexCount = gs_prog_data->vertices_in;
> +
> +         if (gs_prog_data->static_vertex_count != -1) {
> +            gs.StaticOutput = true;
> +            gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count;
> +         }
> +         gs.IncludeVertexHandles = vue_prog_data->include_vue_handles;
> +
> +         gs.UserClipDistanceClipTestEnableBitmask =
> +            transform->ClipPlanesEnabled;
> +         gs.UserClipDistanceCullTestEnableBitmask =
> +            vue_prog_data->cull_distance_mask;
> +

Drop ClipTestEnableBitmask (but leave CullTestEnableBitmask) - it was
was removed in commit 903056e016e3ea52c2f493f8b0938b519ee40894.

> +         const int urb_entry_write_offset = 1;
> +         const uint32_t urb_entry_output_length =
> +            DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) -
> +            urb_entry_write_offset;
> +
> +         gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset;
> +         gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1);
> +#endif
> +      }
> +#if GEN_GEN < 7
> +   } else if (brw->ff_gs.prog_active)  {
> +      /* In gen6, transform feedback for the VS stage is done with an ad-hoc GS
> +       * program. This function provides the needed 3DSTATE_GS for this.
> +       */
> +      upload_gs_state_for_tf(brw);
> +#endif
> +   } else {
> +      brw_batch_emit(brw, GENX(3DSTATE_GS), gs) {
> +         gs.StatisticsEnable = true;
> +#if GEN_GEN < 7
> +         gs.RenderingEnabled = true;
> +#endif
> +
> +#if GEN_GEN < 8
> +         gs.DispatchGRFStartRegisterForURBData = 1;
> +#if GEN_GEN >= 7
> +         gs.IncludeVertexHandles = true;
> +#endif
> +#endif
> +      }
> +   }
> +#if GEN_GEN < 7
> +   brw->gs.enabled = active;
> +#endif
> +}
> +
> +static const struct brw_tracked_state genX(gs_state) = {
> +   .dirty = {
> +      .mesa  = (GEN_GEN < 8 ? _NEW_TRANSFORM : 0) |

We can drop _NEW_TRANSFORM.

> +               (GEN_GEN < 7 ? _NEW_PROGRAM_CONSTANTS : 0),
> +      .brw   = BRW_NEW_BATCH |
> +               BRW_NEW_BLORP |
> +               BRW_NEW_CONTEXT |
> +               BRW_NEW_GEOMETRY_PROGRAM |
> +               BRW_NEW_GS_PROG_DATA |
> +               (GEN_GEN < 7 ? BRW_NEW_FF_GS_PROG_DATA |
> +                              BRW_NEW_PUSH_CONSTANT_ALLOCATION
> +                            : 0),
> +   },
> +   .emit = genX(upload_gs_state),
> +};
> +
>  #endif
>  
>  /* ---------------------------------------------------------------------- */
> @@ -1645,6 +1985,99 @@ static const struct brw_tracked_state genX(ps_state) = {
>     .emit = genX(upload_ps),
>  };
>  
> +/* ---------------------------------------------------------------------- */
> +
> +static void
> +genX(upload_hs_state)(struct brw_context *brw)
> +{
> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +   struct brw_stage_state *stage_state = &brw->tcs.base;
> +   struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data;
> +   const struct brw_vue_prog_data *vue_prog_data =
> +      brw_vue_prog_data(stage_prog_data);
> +
> +   /* BRW_NEW_TES_PROG_DATA */
> +   struct brw_tcs_prog_data *tcs_prog_data =
> +      brw_tcs_prog_data(stage_prog_data);
> +
> +   if (!tcs_prog_data) {
> +      brw_batch_emit(brw, GENX(3DSTATE_HS), hs);
> +   } else {
> +      brw_batch_emit(brw, GENX(3DSTATE_HS), hs) {
> +         INIT_THREAD_DISPATCH_FIELDS(hs, Vertex);
> +
> +         hs.InstanceCount = tcs_prog_data->instances - 1;
> +         hs.IncludeVertexHandles = true;
> +
> +        hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;

Indentation is off here.

> +      }
> +   }
> +}
> +
> +static const struct brw_tracked_state genX(hs_state) = {
> +   .dirty = {
> +      .mesa  = 0,
> +      .brw   = BRW_NEW_BATCH |
> +               BRW_NEW_BLORP |
> +               BRW_NEW_TCS_PROG_DATA |
> +               BRW_NEW_TESS_PROGRAMS,
> +   },
> +   .emit = genX(upload_hs_state),
> +};
> +
> +static void
> +genX(upload_ds_state)(struct brw_context *brw)
> +{
> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +   const struct brw_stage_state *stage_state = &brw->tes.base;
> +   struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data;
> +
> +   /* BRW_NEW_TES_PROG_DATA */
> +   const struct brw_tes_prog_data *tes_prog_data =
> +      brw_tes_prog_data(stage_prog_data);
> +   const struct brw_vue_prog_data *vue_prog_data =
> +      brw_vue_prog_data(stage_prog_data);
> +
> +#if GEN_GEN >= 8
> +   /* _NEW_TRANSFORM */
> +   struct gl_context *ctx = &brw->ctx;
> +   const struct gl_transform_attrib *transform = &ctx->Transform;
> +#endif
> +
> +   if (!tes_prog_data) {
> +      brw_batch_emit(brw, GENX(3DSTATE_DS), ds);
> +   } else {
> +      brw_batch_emit(brw, GENX(3DSTATE_DS), ds) {
> +         INIT_THREAD_DISPATCH_FIELDS(ds, Patch);
> +
> +        ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
> +        ds.ComputeWCoordinateEnable =
> +           tes_prog_data->domain == BRW_TESS_DOMAIN_TRI;
> +
> +#if GEN_GEN >= 8
> +        if (vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8)
> +           ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH;
> +        ds.UserClipDistanceClipTestEnableBitmask =
> +            transform->ClipPlanesEnabled;
> +        ds.UserClipDistanceCullTestEnableBitmask =
> +            vue_prog_data->cull_distance_mask;

Drop ClipTestEnableBitmask (but leave CullTestEnableBitmask) - it was
was removed in commit 903056e016e3ea52c2f493f8b0938b519ee40894.  That
means we can drop the _NEW_TRANSFORM stuff above, and the dirty bit.

> +#endif
> +      }
> +   }
> +}
> +
> +static const struct brw_tracked_state genX(ds_state) = {
> +   .dirty = {
> +      .mesa  = (GEN_GEN < 8 ? _NEW_TRANSFORM : 0),

(This can just be 0.  We were setting it on Gen7-7.5, but there's no
reason for that.  I'll send a patch...)

> +      .brw   = BRW_NEW_BATCH |
> +               BRW_NEW_BLORP |
> +               BRW_NEW_TESS_PROGRAMS |
> +               BRW_NEW_TES_PROG_DATA |
> +               (GEN_GEN < 8 ? BRW_NEW_CONTEXT : 0),

There's no point in BRW_NEW_CONTEXT - BRW_NEW_BATCH already covers
100% of those cases.

> +   },
> +   .emit = genX(upload_ds_state),
> +};
> +
>  #endif
>  
>  /* ---------------------------------------------------------------------- */
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170427/a13fa346/attachment-0001.sig>


More information about the mesa-dev mailing list