[Mesa-dev] [PATCH v02 29/37] i965: Port gen6+ state emitting code to genxml.
Kenneth Graunke
kenneth at whitecape.org
Thu Apr 27 07:35:40 UTC 2017
On Monday, April 24, 2017 3:19:24 PM PDT Rafael Antognolli wrote:
[snip]
> diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c
> deleted file mode 100644
> index 1b5b782..0000000
> --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
> +++ /dev/null
> @@ -1,168 +0,0 @@
> -/*
> - * Copyright © 2013 Intel Corporation
> - *
> - * Permission is hereby granted, free of charge, to any person obtaining a
> - * copy of this software and associated documentation files (the "Software"),
> - * to deal in the Software without restriction, including without limitation
> - * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> - * and/or sell copies of the Software, and to permit persons to whom the
> - * Software is furnished to do so, subject to the following conditions:
> - *
> - * The above copyright notice and this permission notice (including the next
> - * paragraph) shall be included in all copies or substantial portions of the
> - * Software.
> - *
> - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> - * DEALINGS IN THE SOFTWARE.
> - */
> -
> -#include "brw_context.h"
> -#include "brw_state.h"
> -#include "brw_defines.h"
> -#include "intel_batchbuffer.h"
> -
> -static void
> -upload_gs_state(struct brw_context *brw)
> -{
> - const struct gen_device_info *devinfo = &brw->screen->devinfo;
> - const struct brw_stage_state *stage_state = &brw->gs.base;
> - const int max_threads_shift = brw->is_haswell ?
> - HSW_GS_MAX_THREADS_SHIFT : GEN6_GS_MAX_THREADS_SHIFT;
> - /* BRW_NEW_GEOMETRY_PROGRAM */
> - bool active = brw->geometry_program;
> - /* BRW_NEW_GS_PROG_DATA */
> - const struct brw_stage_prog_data *prog_data = stage_state->prog_data;
> - const struct brw_vue_prog_data *vue_prog_data =
> - brw_vue_prog_data(stage_state->prog_data);
> - const struct brw_gs_prog_data *gs_prog_data =
> - brw_gs_prog_data(stage_state->prog_data);
> -
> - /**
> - * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
> - * Geometry > Geometry Shader > State:
> - *
> - * "Note: Because of corruption in IVB:GT2, software needs to flush the
> - * whole fixed function pipeline when the GS enable changes value in
> - * the 3DSTATE_GS."
> - *
> - * The hardware architects have clarified that in this context "flush the
> - * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
> - * Stall" bit set.
> - */
> - if (!brw->is_haswell && brw->gt == 2 && brw->gs.enabled != active)
> - gen7_emit_cs_stall_flush(brw);
You're missing this flush and comment. Please add it back.
> -
> - if (active) {
> - BEGIN_BATCH(7);
> - OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
> - OUT_BATCH(stage_state->prog_offset);
> - OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) <<
> - GEN6_GS_SAMPLER_COUNT_SHIFT) |
> - ((prog_data->binding_table.size_bytes / 4) <<
> - GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
> -
> - if (prog_data->total_scratch) {
> - OUT_RELOC(stage_state->scratch_bo,
> - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> - ffs(stage_state->per_thread_scratch) - 11);
> - } else {
> - OUT_BATCH(0);
> - }
> -
> - uint32_t dw4 =
> - ((gs_prog_data->output_vertex_size_hwords * 2 - 1) <<
> - GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) |
> - (gs_prog_data->output_topology << GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) |
> - (vue_prog_data->urb_read_length <<
> - GEN6_GS_URB_READ_LENGTH_SHIFT) |
> - (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |
> - (prog_data->dispatch_grf_start_reg <<
> - GEN6_GS_DISPATCH_START_GRF_SHIFT);
> -
> - /* Note: the meaning of the GEN7_GS_REORDER_TRAILING bit changes between
> - * Ivy Bridge and Haswell.
> - *
> - * On Ivy Bridge, setting this bit causes the vertices of a triangle
> - * strip to be delivered to the geometry shader in an order that does
> - * not strictly follow the OpenGL spec, but preserves triangle
> - * orientation. For example, if the vertices are (1, 2, 3, 4, 5), then
> - * the geometry shader sees triangles:
> - *
> - * (1, 2, 3), (2, 4, 3), (3, 4, 5)
> - *
> - * (Clearing the bit is even worse, because it fails to preserve
> - * orientation).
> - *
> - * Triangle strips with adjacency always ordered in a way that preserves
> - * triangle orientation but does not strictly follow the OpenGL spec,
> - * regardless of the setting of this bit.
> - *
> - * On Haswell, both triangle strips and triangle strips with adjacency
> - * are always ordered in a way that preserves triangle orientation.
> - * Setting this bit causes the ordering to strictly follow the OpenGL
> - * spec.
> - *
> - * So in either case we want to set the bit. Unfortunately on Ivy
> - * Bridge this will get the order close to correct but not perfect.
> - */
Please keep this comment - it's useful.
> - uint32_t dw5 =
> - ((devinfo->max_gs_threads - 1) << max_threads_shift) |
> - (gs_prog_data->control_data_header_size_hwords <<
> - GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) |
> - ((gs_prog_data->invocations - 1) <<
> - GEN7_GS_INSTANCE_CONTROL_SHIFT) |
> - SET_FIELD(vue_prog_data->dispatch_mode, GEN7_GS_DISPATCH_MODE) |
> - GEN6_GS_STATISTICS_ENABLE |
> - (gs_prog_data->include_primitive_id ?
> - GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) |
> - GEN7_GS_REORDER_TRAILING |
> - GEN7_GS_ENABLE;
> - uint32_t dw6 = 0;
> -
> - if (brw->is_haswell) {
> - dw6 |= gs_prog_data->control_data_format <<
> - HSW_GS_CONTROL_DATA_FORMAT_SHIFT;
> - } else {
> - dw5 |= gs_prog_data->control_data_format <<
> - IVB_GS_CONTROL_DATA_FORMAT_SHIFT;
> - }
> -
> - OUT_BATCH(dw4);
> - OUT_BATCH(dw5);
> - OUT_BATCH(dw6);
> - ADVANCE_BATCH();
> - } else {
> - BEGIN_BATCH(7);
> - OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
> - OUT_BATCH(0); /* prog_bo */
> - OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
> - (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
> - OUT_BATCH(0); /* scratch space base offset */
> - OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
> - (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
> - GEN7_GS_INCLUDE_VERTEX_HANDLES |
> - (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
> - OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
> - GEN6_GS_STATISTICS_ENABLE);
> - OUT_BATCH(0);
> - ADVANCE_BATCH();
> - }
> - brw->gs.enabled = active;
> -}
> -
> -const struct brw_tracked_state gen7_gs_state = {
> - .dirty = {
> - .mesa = _NEW_TRANSFORM,
> - .brw = BRW_NEW_BATCH |
> - BRW_NEW_BLORP |
> - BRW_NEW_CONTEXT |
> - BRW_NEW_GEOMETRY_PROGRAM |
> - BRW_NEW_GS_PROG_DATA,
> - },
> - .emit = upload_gs_state,
> -};
[snip]
> diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
> index d1609f6..d2a936b 100644
> --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
[snip]
> +static void
> +genX(upload_gs_state)(struct brw_context *brw)
> +{
> + const struct gen_device_info *devinfo = &brw->screen->devinfo;
> + const struct brw_stage_state *stage_state = &brw->gs.base;
> + /* BRW_NEW_GEOMETRY_PROGRAM */
> + bool active = brw->geometry_program;
> +
> + /* BRW_NEW_GS_PROG_DATA */
> + struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data;
> + const struct brw_vue_prog_data *vue_prog_data =
> + brw_vue_prog_data(stage_prog_data);
> +#if GEN_GEN >= 7
> + const struct brw_gs_prog_data *gs_prog_data =
> + brw_gs_prog_data(stage_prog_data);
> +#endif
> +
> + /* _NEW_TRANSFORM */
> +#if GEN_GEN >= 8
> + struct gl_context *ctx = &brw->ctx;
> + const struct gl_transform_attrib *transform = &ctx->Transform;
> +#endif
We can delete this block.
> +
> +#if GEN_GEN < 7
> + brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_GS), cgs) {
> + if (active && stage_state->push_const_size != 0) {
> + cgs.Buffer0Valid = true;
> + cgs.PointertoGSConstantBuffer0 = stage_state->push_const_offset;
> + cgs.GSConstantBuffer0ReadLength = stage_state->push_const_size - 1;
> + }
> + }
> +#endif
> +
> + if (active) {
> + brw_batch_emit(brw, GENX(3DSTATE_GS), gs) {
> + INIT_THREAD_DISPATCH_FIELDS(gs, Vertex);
> +
> +#if GEN_GEN >= 7
> + gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
> + gs.OutputTopology = gs_prog_data->output_topology;
> + gs.ControlDataHeaderSize =
> + gs_prog_data->control_data_header_size_hwords;
> +
> + gs.InstanceControl = gs_prog_data->invocations - 1;
> + gs.DispatchMode = vue_prog_data->dispatch_mode;
> +
> + gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
> +
> + gs.ControlDataFormat = gs_prog_data->control_data_format;
> +#endif
> +
> + gs.ReorderMode = TRAILING;
> + gs.MaximumNumberofThreads =
> + GEN_GEN == 8 ? (devinfo->max_gs_threads / 2 - 1)
> + : (devinfo->max_gs_threads - 1);
> +
> +#if GEN_GEN < 7
> + gs.SOStatisticsEnable = true;
> + gs.RenderingEnabled = 1;
> + if (brw->geometry_program->info.has_transform_feedback_varyings)
> + gs.SVBIPayloadEnable = true;
> +
> + /* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it
> + * was previously done for gen6.
> + *
> + * TODO: test with both disabled to see if the HW is behaving
> + * as expected, like in gen7.
> + */
> + gs.SingleProgramFlow = true;
> + gs.VectorMaskEnable = true;
> +#endif
> +
> +#if GEN_GEN >= 8
> + gs.ExpectedVertexCount = gs_prog_data->vertices_in;
> +
> + if (gs_prog_data->static_vertex_count != -1) {
> + gs.StaticOutput = true;
> + gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count;
> + }
> + gs.IncludeVertexHandles = vue_prog_data->include_vue_handles;
> +
> + gs.UserClipDistanceClipTestEnableBitmask =
> + transform->ClipPlanesEnabled;
> + gs.UserClipDistanceCullTestEnableBitmask =
> + vue_prog_data->cull_distance_mask;
> +
Drop ClipTestEnableBitmask (but leave CullTestEnableBitmask) - it was
was removed in commit 903056e016e3ea52c2f493f8b0938b519ee40894.
> + const int urb_entry_write_offset = 1;
> + const uint32_t urb_entry_output_length =
> + DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) -
> + urb_entry_write_offset;
> +
> + gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset;
> + gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1);
> +#endif
> + }
> +#if GEN_GEN < 7
> + } else if (brw->ff_gs.prog_active) {
> + /* In gen6, transform feedback for the VS stage is done with an ad-hoc GS
> + * program. This function provides the needed 3DSTATE_GS for this.
> + */
> + upload_gs_state_for_tf(brw);
> +#endif
> + } else {
> + brw_batch_emit(brw, GENX(3DSTATE_GS), gs) {
> + gs.StatisticsEnable = true;
> +#if GEN_GEN < 7
> + gs.RenderingEnabled = true;
> +#endif
> +
> +#if GEN_GEN < 8
> + gs.DispatchGRFStartRegisterForURBData = 1;
> +#if GEN_GEN >= 7
> + gs.IncludeVertexHandles = true;
> +#endif
> +#endif
> + }
> + }
> +#if GEN_GEN < 7
> + brw->gs.enabled = active;
> +#endif
> +}
> +
> +static const struct brw_tracked_state genX(gs_state) = {
> + .dirty = {
> + .mesa = (GEN_GEN < 8 ? _NEW_TRANSFORM : 0) |
We can drop _NEW_TRANSFORM.
> + (GEN_GEN < 7 ? _NEW_PROGRAM_CONSTANTS : 0),
> + .brw = BRW_NEW_BATCH |
> + BRW_NEW_BLORP |
> + BRW_NEW_CONTEXT |
> + BRW_NEW_GEOMETRY_PROGRAM |
> + BRW_NEW_GS_PROG_DATA |
> + (GEN_GEN < 7 ? BRW_NEW_FF_GS_PROG_DATA |
> + BRW_NEW_PUSH_CONSTANT_ALLOCATION
> + : 0),
> + },
> + .emit = genX(upload_gs_state),
> +};
> +
> #endif
>
> /* ---------------------------------------------------------------------- */
> @@ -1645,6 +1985,99 @@ static const struct brw_tracked_state genX(ps_state) = {
> .emit = genX(upload_ps),
> };
>
> +/* ---------------------------------------------------------------------- */
> +
> +static void
> +genX(upload_hs_state)(struct brw_context *brw)
> +{
> + const struct gen_device_info *devinfo = &brw->screen->devinfo;
> + struct brw_stage_state *stage_state = &brw->tcs.base;
> + struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data;
> + const struct brw_vue_prog_data *vue_prog_data =
> + brw_vue_prog_data(stage_prog_data);
> +
> + /* BRW_NEW_TES_PROG_DATA */
> + struct brw_tcs_prog_data *tcs_prog_data =
> + brw_tcs_prog_data(stage_prog_data);
> +
> + if (!tcs_prog_data) {
> + brw_batch_emit(brw, GENX(3DSTATE_HS), hs);
> + } else {
> + brw_batch_emit(brw, GENX(3DSTATE_HS), hs) {
> + INIT_THREAD_DISPATCH_FIELDS(hs, Vertex);
> +
> + hs.InstanceCount = tcs_prog_data->instances - 1;
> + hs.IncludeVertexHandles = true;
> +
> + hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
Indentation is off here.
> + }
> + }
> +}
> +
> +static const struct brw_tracked_state genX(hs_state) = {
> + .dirty = {
> + .mesa = 0,
> + .brw = BRW_NEW_BATCH |
> + BRW_NEW_BLORP |
> + BRW_NEW_TCS_PROG_DATA |
> + BRW_NEW_TESS_PROGRAMS,
> + },
> + .emit = genX(upload_hs_state),
> +};
> +
> +static void
> +genX(upload_ds_state)(struct brw_context *brw)
> +{
> + const struct gen_device_info *devinfo = &brw->screen->devinfo;
> + const struct brw_stage_state *stage_state = &brw->tes.base;
> + struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data;
> +
> + /* BRW_NEW_TES_PROG_DATA */
> + const struct brw_tes_prog_data *tes_prog_data =
> + brw_tes_prog_data(stage_prog_data);
> + const struct brw_vue_prog_data *vue_prog_data =
> + brw_vue_prog_data(stage_prog_data);
> +
> +#if GEN_GEN >= 8
> + /* _NEW_TRANSFORM */
> + struct gl_context *ctx = &brw->ctx;
> + const struct gl_transform_attrib *transform = &ctx->Transform;
> +#endif
> +
> + if (!tes_prog_data) {
> + brw_batch_emit(brw, GENX(3DSTATE_DS), ds);
> + } else {
> + brw_batch_emit(brw, GENX(3DSTATE_DS), ds) {
> + INIT_THREAD_DISPATCH_FIELDS(ds, Patch);
> +
> + ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
> + ds.ComputeWCoordinateEnable =
> + tes_prog_data->domain == BRW_TESS_DOMAIN_TRI;
> +
> +#if GEN_GEN >= 8
> + if (vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8)
> + ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH;
> + ds.UserClipDistanceClipTestEnableBitmask =
> + transform->ClipPlanesEnabled;
> + ds.UserClipDistanceCullTestEnableBitmask =
> + vue_prog_data->cull_distance_mask;
Drop ClipTestEnableBitmask (but leave CullTestEnableBitmask) - it was
was removed in commit 903056e016e3ea52c2f493f8b0938b519ee40894. That
means we can drop the _NEW_TRANSFORM stuff above, and the dirty bit.
> +#endif
> + }
> + }
> +}
> +
> +static const struct brw_tracked_state genX(ds_state) = {
> + .dirty = {
> + .mesa = (GEN_GEN < 8 ? _NEW_TRANSFORM : 0),
(This can just be 0. We were setting it on Gen7-7.5, but there's no
reason for that. I'll send a patch...)
> + .brw = BRW_NEW_BATCH |
> + BRW_NEW_BLORP |
> + BRW_NEW_TESS_PROGRAMS |
> + BRW_NEW_TES_PROG_DATA |
> + (GEN_GEN < 8 ? BRW_NEW_CONTEXT : 0),
There's no point in BRW_NEW_CONTEXT - BRW_NEW_BATCH already covers
100% of those cases.
> + },
> + .emit = genX(upload_ds_state),
> +};
> +
> #endif
>
> /* ---------------------------------------------------------------------- */
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170427/a13fa346/attachment-0001.sig>
More information about the mesa-dev
mailing list