[Mesa-dev] [PATCH v4 6/6] i965: gl_BaseVertex must be zero for non-indexed draw calls
Jason Ekstrand
jason at jlekstrand.net
Tue Apr 10 16:26:26 UTC 2018
On Tue, Apr 10, 2018 at 1:28 AM, Antia Puentes <apuentes at igalia.com> wrote:
> On 07/04/18 08:21, Jason Ekstrand wrote:
>
> On Fri, Apr 6, 2018 at 2:53 PM, Ian Romanick <idr at freedesktop.org> wrote:
>
>> From: Antia Puentes <apuentes at igalia.com>
>>
>> We keep 'firstvertex' as it is and move gl_BaseVertex to the drawID
>> vertex element. The previous Vertex Elements order was:
>>
>> * VE 1: <BaseVertex/firstvertex, BaseInstance, VertexID, InstanceID>
>> * VE 2: <Draw ID, 0, 0, 0>
>>
>> and now it is:
>>
>> * VE 1: <firstvertex, BaseInstance, VertexID, InstanceID>
>> * VE 2: <Draw ID, BaseVertex, 0, 0>
>>
>> To move the BaseVertex keeping VE1 as it is, allows to keep pointing the
>> vertex buffer associated to VE 1 to the indirect buffer for indirect
>> draw calls.
>>
>> From the OpenGL 4.6 (11.1.3.9 Shader Inputs) specification:
>>
>> "gl_BaseVertex holds the integer value passed to the baseVertex
>> parameter to the command that resulted in the current shader
>> invocation. In the case where the command has no baseVertex parameter,
>> the value of gl_BaseVertex is zero."
>>
>> Fixes CTS tests:
>>
>> * KHR-GL45.shader_draw_parameters_tests.ShaderDrawArraysParameters
>> * KHR-GL45.shader_draw_parameters_tests.ShaderDrawArraysInstan
>> cedParameters
>> * KHR-GL45.shader_draw_parameters_tests.ShaderMultiDrawArraysParameters
>> * KHR-GL45.shader_draw_parameters_tests.ShaderMultiDrawArraysI
>> ndirectParameters
>> * KHR-GL45.shader_draw_parameters_tests.MultiDrawArraysIndirec
>> tCountParameters
>>
>> v2 (idr): Make changes to brw_prepare_shader_draw_parameters matching
>> those in genX(emit_vertices). Reformat commit message to 72 columns.
>>
>> Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102678
>> ---
>> src/intel/compiler/brw_nir.c | 14 +++++----
>> src/intel/compiler/brw_vec4.cpp | 14 +++++----
>> src/mesa/drivers/dri/i965/brw_context.h | 32 ++++++++++++++-----
>> src/mesa/drivers/dri/i965/brw_draw.c | 45
>> ++++++++++++++++++---------
>> src/mesa/drivers/dri/i965/brw_draw_upload.c | 14 ++++-----
>> src/mesa/drivers/dri/i965/genX_state_upload.c | 38
>> +++++++++++-----------
>> 6 files changed, 97 insertions(+), 60 deletions(-)
>>
>> diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
>> index 16b0d86814f..16ab529737b 100644
>> --- a/src/intel/compiler/brw_nir.c
>> +++ b/src/intel/compiler/brw_nir.c
>> @@ -238,8 +238,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
>> */
>> const bool has_sgvs =
>> nir->info.system_values_read &
>> - (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
>> - BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
>> + (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
>> BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
>> BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
>> BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID));
>> @@ -279,7 +278,6 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
>>
>> nir_intrinsic_set_base(load, num_inputs);
>> switch (intrin->intrinsic) {
>> - case nir_intrinsic_load_base_vertex:
>> case nir_intrinsic_load_first_vertex:
>> nir_intrinsic_set_component(load, 0);
>> break;
>> @@ -293,11 +291,15 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
>> nir_intrinsic_set_component(load, 3);
>> break;
>> case nir_intrinsic_load_draw_id:
>> - /* gl_DrawID is stored right after gl_VertexID and
>> friends
>> - * if any of them exist.
>> + case nir_intrinsic_load_base_vertex:
>> + /* gl_DrawID and gl_BaseVertex are stored right after
>> + gl_VertexID and friends if any of them exist.
>> */
>> nir_intrinsic_set_base(load, num_inputs + has_sgvs);
>> - nir_intrinsic_set_component(load, 0);
>> + if (intrin->intrinsic == nir_intrinsic_load_draw_id)
>> + nir_intrinsic_set_component(load, 0);
>> + else
>> + nir_intrinsic_set_component(load, 1);
>> break;
>> default:
>> unreachable("Invalid system value intrinsic");
>> diff --git a/src/intel/compiler/brw_vec4.cpp
>> b/src/intel/compiler/brw_vec4.cpp
>> index 1e384f5bf4d..d33caefdea9 100644
>> --- a/src/intel/compiler/brw_vec4.cpp
>> +++ b/src/intel/compiler/brw_vec4.cpp
>> @@ -2825,14 +2825,19 @@ brw_compile_vs(const struct brw_compiler
>> *compiler, void *log_data,
>> * incoming vertex attribute. So, add an extra slot.
>> */
>> if (shader->info.system_values_read &
>> - (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
>> - BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
>> + (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
>> BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
>> BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
>> BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) {
>> nr_attribute_slots++;
>> }
>>
>> + if (shader->info.system_values_read &
>> + (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
>> + BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID))) {
>> + nr_attribute_slots++;
>> + }
>> +
>> if (shader->info.system_values_read &
>> BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX))
>> prog_data->uses_basevertex = true;
>> @@ -2853,12 +2858,9 @@ brw_compile_vs(const struct brw_compiler
>> *compiler, void *log_data,
>> BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))
>> prog_data->uses_instanceid = true;
>>
>> - /* gl_DrawID has its very own vec4 */
>> if (shader->info.system_values_read &
>> - BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)) {
>> + BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID))
>> prog_data->uses_drawid = true;
>> - nr_attribute_slots++;
>> - }
>>
>> /* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB
>> Entry
>> * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. Empirically,
>> in
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.h
>> b/src/mesa/drivers/dri/i965/brw_context.h
>> index c65a22c38bb..6758b34f47e 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.h
>> +++ b/src/mesa/drivers/dri/i965/brw_context.h
>> @@ -898,20 +898,36 @@ struct brw_context
>> } params;
>>
>> /**
>> - * Buffer and offset used for GL_ARB_shader_draw_parameters
>> - * (for now, only gl_BaseVertex).
>> + * Buffer and offset used for GL_ARB_shader_draw_parameters which
>> will
>> + * point to the indirect buffer for indirect draw calls.
>> */
>> struct brw_bo *draw_params_bo;
>> uint32_t draw_params_offset;
>>
>> + struct {
>> + /**
>> + * The value of gl_DrawID for the current _mesa_prim. This
>> always comes
>> + * in from it's own vertex buffer since it's not part of the
>> indirect
>> + * draw parameters.
>> + */
>> + int gl_drawid;
>> +
>> + /**
>> + * The value of gl_BaseVertex for the current _mesa_prim.
>> Although
>> + * gl_BaseVertex is part of the indirect buffer for indexed
>> draw calls,
>> + * that is not longer the case for non-indexed draw calls,
>> where it must
>> + * be zero, so we store it in a different buffer.
>> + */
>> + int gl_basevertex;
>> + } derived_params;
>> +
>> /**
>> - * The value of gl_DrawID for the current _mesa_prim. This always
>> comes
>> - * in from it's own vertex buffer since it's not part of the
>> indirect
>> - * draw parameters.
>> + * Buffer and offset used for GL_ARB_shader_draw_parameters which
>> contains
>> + * parameters that are not present in the indirect buffer. They
>> will go in
>> + * their own vertex element.
>> */
>> - int gl_drawid;
>> - struct brw_bo *draw_id_bo;
>> - uint32_t draw_id_offset;
>> + struct brw_bo *derived_draw_params_bo;
>> + uint32_t derived_draw_params_offset;
>>
>> /**
>> * Pointer to the the buffer storing the indirect draw parameters.
>> It
>> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c
>> b/src/mesa/drivers/dri/i965/brw_draw.c
>> index f51f083178e..e9ec8d585d2 100644
>> --- a/src/mesa/drivers/dri/i965/brw_draw.c
>> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
>> @@ -241,6 +241,14 @@ brw_emit_prim(struct brw_context *brw,
>> prim->indirect_offset + 12);
>> brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo,
>> prim->indirect_offset + 16);
>> +
>> + /* Store the gl_BaseVertex value in its vertex buffer */
>> + if (brw->draw.derived_draw_params_bo != NULL) {
>> + brw_store_register_mem32(brw, brw->draw.derived_draw_params_
>> bo,
>> + GEN7_3DPRIM_BASE_VERTEX,
>> + brw->draw.derived_draw_params_offset
>> + 4);
>> + brw_emit_mi_flush(brw);
>> + }
>>
>
> Oh, boy, this is tricky... First of all, it's a bit of a bummer that we
> can't just load the indirect buffer again for this. Not too much to do
> about it, I guess.
>
> Second, there be very scary dragons here. It turns out that, at least on
> Haswell (and possibly other platforms), reading from state registers while
> rendering is in-flight can lead to GPU hangs. Yes, I said "reading". We
> found this out the hard way while working on Vulkan indirect clear colors.
> The better thing to do here would be to use GPRs when available (I think
> they're safe but I'm not sure) or to do a MI_COPY_MEM_MEM which, of course,
> is only available on gen8+. On Ivy Bridge (and haswell if we're going to
> do a store_register_mem from a state register), we need to do a mi_flush
> *before* the store as well.
>
>
>
> I see that this is complicated, I have thought in a different way to
> implement this.
> Instead of moving gl_BaseVertex to a VE2 and reading its value from state
> registers:
>
> - VE1 remains as: <firstvertex, BaseInstance, VertexID, InstanceID>
> -> Patches 1-5 are still valid (I think) and we can still calculate the
> VertexID as FirstVertex + VertexIDZeroBased.
>
> - VE2 contains: <Draw ID, IsIndexedDraw, 0, 0>,
> ->when asked for glBaseVertex (nir_instrinsic_load_base_vertex), we would
> return the value stored in FirstVertex is the draw call is indexed, zero if
> it is not.
>
> How does it sound?.
>
That sounds fine to me. It's one extra instruciton in the shader (it could
be an AND if IsIndexedDraw is 0/~0) and lets us avoid trying to do
command-stream copying of data. If I understand correcctly, Vulkan will
never hit this path because it will always use FIRST_VERETX and not
BASE_VERTEX. Is that correct?
> } else {
>> brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo,
>> prim->indirect_offset + 12);
>> @@ -815,7 +823,7 @@ brw_draw_single_prim(struct gl_context *ctx,
>> }
>>
>> /* Determine if we need to flag BRW_NEW_VERTICES for updating the
>> - * gl_BaseVertexARB or gl_BaseInstanceARB values. For indirect draw,
>> we
>> + * firstvertex or gl_BaseInstanceARB values. For indirect draw, we
>> * always flag if the shader uses one of the values. For direct draws,
>> * we only flag if the values change.
>> */
>> @@ -825,16 +833,12 @@ brw_draw_single_prim(struct gl_context *ctx,
>> const struct brw_vs_prog_data *vs_prog_data =
>> brw_vs_prog_data(brw->vs.base.prog_data);
>> if (prim_id > 0) {
>> - const bool uses_firstvertex =
>> - vs_prog_data->uses_basevertex ||
>> - vs_prog_data->uses_firstvertex;
>> -
>> const bool uses_draw_parameters =
>> - uses_firstvertex ||
>> + vs_prog_data->uses_firstvertex ||
>> vs_prog_data->uses_baseinstance;
>>
>> if ((uses_draw_parameters && prim->is_indirect) ||
>> - (uses_firstvertex &&
>> + (vs_prog_data->uses_firstvertex &&
>> brw->draw.params.firstvertex != new_firstvertex) ||
>> (vs_prog_data->uses_baseinstance &&
>> brw->draw.params.gl_baseinstance != new_baseinstance))
>> @@ -861,17 +865,28 @@ brw_draw_single_prim(struct gl_context *ctx,
>> }
>>
>> /* gl_DrawID always needs its own vertex buffer since it's not part of
>> - * the indirect parameter buffer. If the program uses gl_DrawID we
>> need
>> - * to flag BRW_NEW_VERTICES. For the first iteration, we don't have
>> - * valid vs_prog_data, but we always flag BRW_NEW_VERTICES before
>> - * the loop.
>> + * the indirect parameter buffer. This is the same for gl_BaseVertex,
>> which
>> + * is not part of the indirect parameter buffer for non-indexed draw
>> calls.
>> + * If the program uses gl_DrawID or, uses gl_BaseVertex and it is an
>> indirect
>> + * draw call or the value has changed, we need to flag
>> BRW_NEW_VERTICES.
>> + * For the first iteration, we don't have valid vs_prog_data, but we
>> always
>> + * flag BRW_NEW_VERTICES before the loop.
>> */
>> - brw->draw.gl_drawid = prim->draw_id;
>> - brw_bo_unreference(brw->draw.draw_id_bo);
>> - brw->draw.draw_id_bo = NULL;
>> - if (prim_id > 0 && vs_prog_data->uses_drawid)
>> + const int new_basevertex = prim->indexed ? prim->basevertex : 0;
>> + if (prim_id > 0 &&
>> + (vs_prog_data->uses_drawid ||
>> + (vs_prog_data->uses_basevertex &&
>> + (prim->is_indirect ||
>> + brw->draw.derived_params.gl_basevertex != new_basevertex))))
>> brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
>>
>> + brw->draw.derived_params.gl_drawid = prim->draw_id;
>> + brw->draw.derived_params.gl_basevertex = new_basevertex;
>> +
>> + brw_bo_unreference(brw->draw.derived_draw_params_bo);
>> + brw->draw.derived_draw_params_bo = NULL;
>> + brw->draw.derived_draw_params_offset = 0;
>> +
>> if (devinfo->gen < 6)
>> brw_set_prim(brw, prim);
>> else
>> diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c
>> b/src/mesa/drivers/dri/i965/brw_draw_upload.c
>> index 7573f780f23..54ba951d6fd 100644
>> --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
>> +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
>> @@ -704,11 +704,11 @@ brw_prepare_shader_draw_parameters(struct
>> brw_context *brw)
>> const struct brw_vs_prog_data *vs_prog_data =
>> brw_vs_prog_data(brw->vs.base.prog_data);
>>
>> - const bool uses_firstvertex =
>> - vs_prog_data->uses_basevertex || vs_prog_data->uses_firstvertex;
>> + const bool uses_derived_draw_params =
>> + vs_prog_data->uses_basevertex || vs_prog_data->uses_drawid;
>>
>> /* For non-indirect draws, upload the shader draw parameters */
>> - if ((uses_firstvertex || vs_prog_data->uses_baseinstance) &&
>> + if ((vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance)
>> &&
>> brw->draw.draw_params_bo == NULL) {
>> brw_upload_data(&brw->upload,
>> &brw->draw.params, sizeof(brw->draw.params), 4,
>> @@ -716,11 +716,11 @@ brw_prepare_shader_draw_parameters(struct
>> brw_context *brw)
>> &brw->draw.draw_params_offset);
>> }
>>
>> - if (vs_prog_data->uses_drawid) {
>> + if (uses_derived_draw_params) {
>> brw_upload_data(&brw->upload,
>> - &brw->draw.gl_drawid, sizeof(brw->draw.gl_drawid),
>> 4,
>> - &brw->draw.draw_id_bo,
>> - &brw->draw.draw_id_offset);
>> + &brw->draw.derived_params,
>> sizeof(brw->draw.derived_params), 4,
>> + &brw->draw.derived_draw_params_bo,
>> + &brw->draw.derived_draw_params_offset);
>> }
>> }
>>
>> diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c
>> b/src/mesa/drivers/dri/i965/genX_state_upload.c
>> index 1a32c60ae34..8323446b4ac 100644
>> --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
>> +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
>> @@ -539,16 +539,14 @@ genX(emit_vertices)(struct brw_context *brw)
>> }
>> #endif
>>
>> - const bool uses_firstvertex =
>> - vs_prog_data->uses_basevertex || vs_prog_data->uses_firstvertex;
>> -
>> - const bool needs_sgvs_element = (uses_firstvertex ||
>> + const bool needs_sgvs_element = (vs_prog_data->uses_firstvertex ||
>> vs_prog_data->uses_baseinstance ||
>> vs_prog_data->uses_instanceid ||
>> vs_prog_data->uses_vertexid);
>>
>> unsigned nr_elements =
>> - brw->vb.nr_enabled + needs_sgvs_element +
>> vs_prog_data->uses_drawid;
>> + brw->vb.nr_enabled + needs_sgvs_element +
>> + (vs_prog_data->uses_drawid || vs_prog_data->uses_basevertex);
>>
>> #if GEN_GEN < 8
>> /* If any of the formats of vb.enabled needs more that one upload, we
>> need
>> @@ -589,10 +587,15 @@ genX(emit_vertices)(struct brw_context *brw)
>>
>> /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS
>> packets. */
>> const bool uses_draw_params =
>> - uses_firstvertex ||
>> + vs_prog_data->uses_firstvertex ||
>> vs_prog_data->uses_baseinstance;
>> +
>> + const bool uses_derived_draw_params =
>> + vs_prog_data->uses_drawid ||
>> + vs_prog_data->uses_basevertex;
>> +
>> const unsigned nr_buffers = brw->vb.nr_buffers +
>> - uses_draw_params + vs_prog_data->uses_drawid;
>> + uses_draw_params + uses_derived_draw_params;
>>
>> if (nr_buffers) {
>> assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17));
>> @@ -626,11 +629,11 @@ genX(emit_vertices)(struct brw_context *brw)
>> 0 /* step rate */);
>> }
>>
>> - if (vs_prog_data->uses_drawid) {
>> + if (uses_derived_draw_params) {
>> dw = genX(emit_vertex_buffer_state)(brw, dw,
>> brw->vb.nr_buffers + 1,
>> - brw->draw.draw_id_bo,
>> - brw->draw.draw_id_offset,
>> - brw->draw.draw_id_bo->size,
>> + brw->draw.derived_draw_
>> params_bo,
>> + brw->draw.derived_draw_
>> params_offset,
>> + brw->draw.derived_draw_
>> params_bo->size,
>> 0 /* stride */,
>> 0 /* step rate */);
>> }
>> @@ -772,7 +775,7 @@ genX(emit_vertices)(struct brw_context *brw)
>> };
>>
>> #if GEN_GEN >= 8
>> - if (uses_firstvertex ||
>> + if (vs_prog_data->uses_firstvertex ||
>> vs_prog_data->uses_baseinstance) {
>> elem_state.VertexBufferIndex = brw->vb.nr_buffers;
>> elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
>> @@ -782,11 +785,10 @@ genX(emit_vertices)(struct brw_context *brw)
>> #else
>> elem_state.VertexBufferIndex = brw->vb.nr_buffers;
>> elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
>> - if (uses_firstvertex)
>> + if (vs_prog_data->uses_firstvertex ||
>> vs_prog_data->uses_baseinstance) {
>> elem_state.Component0Control = VFCOMP_STORE_SRC;
>> -
>> - if (vs_prog_data->uses_baseinstance)
>> elem_state.Component1Control = VFCOMP_STORE_SRC;
>> + }
>>
>> if (vs_prog_data->uses_vertexid)
>> elem_state.Component2Control = VFCOMP_STORE_VID;
>> @@ -799,13 +801,13 @@ genX(emit_vertices)(struct brw_context *brw)
>> dw += GENX(VERTEX_ELEMENT_STATE_length);
>> }
>>
>> - if (vs_prog_data->uses_drawid) {
>> + if (vs_prog_data->uses_drawid || vs_prog_data->uses_basevertex) {
>> struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
>> .Valid = true,
>> .VertexBufferIndex = brw->vb.nr_buffers + 1,
>> - .SourceElementFormat = ISL_FORMAT_R32_UINT,
>> + .SourceElementFormat = ISL_FORMAT_R32G32_UINT,
>> .Component0Control = VFCOMP_STORE_SRC,
>> - .Component1Control = VFCOMP_STORE_0,
>> + .Component1Control = VFCOMP_STORE_SRC,
>> .Component2Control = VFCOMP_STORE_0,
>> .Component3Control = VFCOMP_STORE_0,
>> #if GEN_GEN < 5
>> --
>> 2.14.3
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
>
>
> _______________________________________________
> mesa-dev mailing listmesa-dev at lists.freedesktop.orghttps://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180410/80a454c1/attachment-0001.html>
More information about the mesa-dev
mailing list