[Mesa-dev] [PATCH 4/7] i965: Add support for gl_BaseVertexARB and gl_BaseInstanceARB
Ian Romanick
idr at freedesktop.org
Tue Dec 15 14:27:29 PST 2015
On 12/15/2015 11:48 AM, Anuj Phogat wrote:
> On Tue, Dec 15, 2015 at 12:28 AM, Kristian Høgsberg Kristensen
> <krh at bitplanet.net> wrote:
>> We already have gl_BaseVertexARB in the .x component of the SGVS vec4
>> and plug gl_BaseInstanceARB into the last free component (.y).
>> ---
>> src/mesa/drivers/dri/i965/brw_compiler.h | 2 ++
>> src/mesa/drivers/dri/i965/brw_context.h | 9 ++++--
>> src/mesa/drivers/dri/i965/brw_draw.c | 12 ++++++--
>> src/mesa/drivers/dri/i965/brw_draw_upload.c | 35 ++++++++++++++---------
>> src/mesa/drivers/dri/i965/brw_fs.cpp | 3 +-
>> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 10 ++++++-
>> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 6 +++-
>> src/mesa/drivers/dri/i965/brw_vec4.cpp | 12 ++++++--
>> src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 10 ++++++-
>> src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp | 6 +++-
>> src/mesa/drivers/dri/i965/gen8_draw_upload.c | 35 ++++++++++++++---------
>> 11 files changed, 102 insertions(+), 38 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
>> index 218d9c7..58ee966 100644
>> --- a/src/mesa/drivers/dri/i965/brw_compiler.h
>> +++ b/src/mesa/drivers/dri/i965/brw_compiler.h
>> @@ -547,6 +547,8 @@ struct brw_vs_prog_data {
>>
>> bool uses_vertexid;
>> bool uses_instanceid;
>> + bool uses_basevertex;
>> + bool uses_baseinstance;
> Missed bool uses_drawid ?
It looks like there may be some rebase or patch splitting issues. These
are added in the next patch, but they are already used in this patch. I
think it will compile after patch 5, but I don't think it will compile
after patch 4.
>> };
>>
>> struct brw_tcs_prog_data
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
>> index a845541..1378402 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.h
>> +++ b/src/mesa/drivers/dri/i965/brw_context.h
>> @@ -905,8 +905,13 @@ struct brw_context
>> uint32_t pma_stall_bits;
>>
>> struct {
>> - /** The value of gl_BaseVertex for the current _mesa_prim. */
>> - int gl_basevertex;
>> + struct {
>> + /** The value of gl_BaseVertex for the current _mesa_prim. */
>> + int gl_basevertex;
>> +
>> + /** The value of gl_BaseInstance for the current _mesa_prim. */
>> + int gl_baseinstance;
>> + } params;
> Missed gl_drawid and gl_drawid_bo ?
>>
>> /**
>> * Buffer and offset used for GL_ARB_shader_draw_parameters
>> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
>> index 8398471..298ac06 100644
>> --- a/src/mesa/drivers/dri/i965/brw_draw.c
>> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
>> @@ -491,9 +491,9 @@ brw_try_draw_prims(struct gl_context *ctx,
>> }
>> }
>>
>> - brw->draw.gl_basevertex =
>> + brw->draw.params.gl_basevertex =
>> prims[i].indexed ? prims[i].basevertex : prims[i].start;
>> -
>> + brw->draw.params.gl_baseinstance = prims[i].base_instance;
>> drm_intel_bo_unreference(brw->draw.draw_params_bo);
>>
>> if (prims[i].is_indirect) {
>> @@ -511,6 +511,14 @@ brw_try_draw_prims(struct gl_context *ctx,
>> brw->draw.draw_params_offset = 0;
>> }
>>
>> + /* gl_DrawID always needs its own vertex buffer since it's not part of
>> + * the indirect parameter buffer. */
>> + if (brw->vs.prog_data->uses_drawid) {
>> + brw->draw.gl_drawid = prims[i].drawid;
> brw->draw.gl_drawid = prims[i].draw_id;
>> + drm_intel_bo_unreference(brw->draw.draw_id_bo);
>> + brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
>> + }
>> +
>> if (brw->gen < 6)
>> brw_set_prim(brw, &prims[i]);
>> else
>> diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
>> index ea0f6f2..ccf963c 100644
>> --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
>> +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
>> @@ -592,8 +592,10 @@ void
>> brw_prepare_shader_draw_parameters(struct brw_context *brw)
>> {
>> /* For non-indirect draws, upload gl_BaseVertex. */
>> - if (brw->vs.prog_data->uses_vertexid && brw->draw.draw_params_bo == NULL) {
>> - intel_upload_data(brw, &brw->draw.gl_basevertex, 4, 4,
>> + if ((brw->vs.prog_data->uses_basevertex ||
>> + brw->vs.prog_data->uses_baseinstance) &&
>> + brw->draw.draw_params_bo == NULL) {
>> + intel_upload_data(brw, &brw->draw.params, sizeof(brw->draw.params), 4,
>> &brw->draw.draw_params_bo,
>> &brw->draw.draw_params_offset);
>> }
>> @@ -658,7 +660,8 @@ brw_emit_vertices(struct brw_context *brw)
>> brw_emit_query_begin(brw);
>>
>> unsigned nr_elements = brw->vb.nr_enabled;
>> - if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid)
>> + if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid ||
>> + brw->vs.prog_data->uses_basevertex || brw->vs.prog_data->uses_baseinstance)
>> ++nr_elements;
>>
>> /* If the VS doesn't read any inputs (calculating vertex position from
>> @@ -693,8 +696,10 @@ brw_emit_vertices(struct brw_context *brw)
>> /* Now emit VB and VEP state packets.
>> */
>>
>> - unsigned nr_buffers =
>> - brw->vb.nr_buffers + brw->vs.prog_data->uses_vertexid;
>> + const bool uses_draw_params =
>> + brw->vs.prog_data->uses_basevertex ||
>> + brw->vs.prog_data->uses_baseinstance;
>> + const unsigned nr_buffers = brw->vb.nr_buffers + uses_draw_params;
>>
>> if (nr_buffers) {
>> if (brw->gen >= 6) {
>> @@ -713,7 +718,7 @@ brw_emit_vertices(struct brw_context *brw)
>>
>> }
>>
>> - if (brw->vs.prog_data->uses_vertexid) {
>> + if (uses_draw_params) {
>> EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers,
>> brw->draw.draw_params_bo,
>> brw->draw.draw_params_bo->size - 1,
>> @@ -790,21 +795,25 @@ brw_emit_vertices(struct brw_context *brw)
>> ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
>> }
>>
>> - if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
>> + if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid ||
>> + brw->vs.prog_data->uses_basevertex || brw->vs.prog_data->uses_baseinstance) {
>> uint32_t dw0 = 0, dw1 = 0;
>> uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0;
>> uint32_t comp1 = BRW_VE1_COMPONENT_STORE_0;
>> uint32_t comp2 = BRW_VE1_COMPONENT_STORE_0;
>> uint32_t comp3 = BRW_VE1_COMPONENT_STORE_0;
>>
>> - if (brw->vs.prog_data->uses_vertexid) {
>> + if (brw->vs.prog_data->uses_basevertex)
>> comp0 = BRW_VE1_COMPONENT_STORE_SRC;
>> +
>> + if (brw->vs.prog_data->uses_baseinstance)
>> + comp1 = BRW_VE1_COMPONENT_STORE_SRC;
>> +
>> + if (brw->vs.prog_data->uses_vertexid)
>> comp2 = BRW_VE1_COMPONENT_STORE_VID;
>> - }
>>
>> - if (brw->vs.prog_data->uses_instanceid) {
>> + if (brw->vs.prog_data->uses_instanceid)
>> comp3 = BRW_VE1_COMPONENT_STORE_IID;
>> - }
>>
>> dw1 = (comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
>> (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
>> @@ -814,11 +823,11 @@ brw_emit_vertices(struct brw_context *brw)
>> if (brw->gen >= 6) {
>> dw0 |= GEN6_VE0_VALID |
>> brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
>> - BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT;
>> + BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT;
>> } else {
>> dw0 |= BRW_VE0_VALID |
>> brw->vb.nr_buffers << BRW_VE0_INDEX_SHIFT |
>> - BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT;
>> + BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT;
>> dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
>> }
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> index 5e8acec..918bc0f 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> @@ -1671,7 +1671,8 @@ fs_visitor::assign_vs_urb_setup()
>>
>> assert(stage == MESA_SHADER_VERTEX);
>> int count = _mesa_bitcount_64(vs_prog_data->inputs_read);
>> - if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid)
>> + if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
>> + vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance)
>> count++;
>>
>> /* Each attribute is 4 regs. */
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> index db38f61..2e4937e 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> @@ -221,6 +221,13 @@ emit_system_values_block(nir_block *block, void *void_visitor)
>> *reg = *v->emit_vs_system_value(SYSTEM_VALUE_INSTANCE_ID);
>> break;
>>
>> + case nir_intrinsic_load_base_instance:
>> + assert(v->stage == MESA_SHADER_VERTEX);
>> + reg = &v->nir_system_values[SYSTEM_VALUE_BASE_INSTANCE];
>> + if (reg->file == BAD_FILE)
>> + *reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_INSTANCE);
>> + break;
>> +
>> case nir_intrinsic_load_invocation_id:
>> assert(v->stage == MESA_SHADER_GEOMETRY);
>> reg = &v->nir_system_values[SYSTEM_VALUE_INVOCATION_ID];
>> @@ -1731,7 +1738,8 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
>>
>> case nir_intrinsic_load_vertex_id_zero_base:
>> case nir_intrinsic_load_base_vertex:
>> - case nir_intrinsic_load_instance_id: {
>> + case nir_intrinsic_load_instance_id:
>> + case nir_intrinsic_load_base_instance: {
>> gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
>> fs_reg val = nir_system_values[sv];
>> assert(val.file != BAD_FILE);
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> index d5193a9..ae98de1 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> @@ -43,7 +43,11 @@ fs_visitor::emit_vs_system_value(int location)
>> switch (location) {
>> case SYSTEM_VALUE_BASE_VERTEX:
>> reg->reg_offset = 0;
>> - vs_prog_data->uses_vertexid = true;
>> + vs_prog_data->uses_basevertex = true;
>> + break;
>> + case SYSTEM_VALUE_BASE_INSTANCE:
>> + reg->reg_offset = 1;
>> + vs_prog_data->uses_baseinstance = true;
>> break;
>> case SYSTEM_VALUE_VERTEX_ID:
>> unreachable("should have been lowered");
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> index a697bdf..71c0f9f 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> @@ -1570,7 +1570,8 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
>> * don't represent it with a flag in inputs_read, so we call it
>> * VERT_ATTRIB_MAX.
>> */
>> - if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) {
>> + if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
>> + vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) {
>> attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes;
>> }
>>
>> @@ -1971,11 +1972,18 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
>> * incoming vertex attribute. So, add an extra slot.
>> */
>> if (shader->info.system_values_read &
>> - (BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
>> + (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
>> + BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
>> + BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
>> BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) {
>> nr_attributes++;
>> }
>>
>> + /* gl_DrawID has its very own vec4 */
>> + if (shader->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)) {
>> + nr_attributes++;
>> + }
>> +
>> /* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB Entry
>> * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. Empirically, in
>> * vec4 mode, the hardware appears to wedge unless we read something.
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> index f965b39..c6f07d5 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> @@ -78,6 +78,13 @@ vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
>> glsl_type::int_type);
>> break;
>>
>> + case nir_intrinsic_load_base_instance:
>> + reg = &nir_system_values[SYSTEM_VALUE_BASE_INSTANCE];
>> + if (reg->file == BAD_FILE)
>> + *reg = *make_reg_for_system_value(SYSTEM_VALUE_BASE_INSTANCE,
>> + glsl_type::int_type);
>> + break;
>> +
>> default:
>> break;
>> }
>> @@ -650,7 +657,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
>>
>> case nir_intrinsic_load_vertex_id_zero_base:
>> case nir_intrinsic_load_base_vertex:
>> - case nir_intrinsic_load_instance_id: {
>> + case nir_intrinsic_load_instance_id:
>> + case nir_intrinsic_load_base_instance: {
>> gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
>> src_reg val = src_reg(nir_system_values[sv]);
>> assert(val.file != BAD_FILE);
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
>> index fd8be7d..bd6a9a4 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
>> @@ -155,7 +155,11 @@ vec4_vs_visitor::make_reg_for_system_value(int location,
>> switch (location) {
>> case SYSTEM_VALUE_BASE_VERTEX:
>> reg->writemask = WRITEMASK_X;
>> - vs_prog_data->uses_vertexid = true;
>> + vs_prog_data->uses_basevertex = true;
>> + break;
>> + case SYSTEM_VALUE_BASE_INSTANCE:
>> + reg->writemask = WRITEMASK_Y;
>> + vs_prog_data->uses_baseinstance = true;
>> break;
>> case SYSTEM_VALUE_VERTEX_ID:
>> case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
>> diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
>> index 198d612..451cf0b 100644
>> --- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
>> +++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
>> @@ -115,7 +115,11 @@ gen8_emit_vertices(struct brw_context *brw)
>> }
>>
>> /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */
>> - unsigned nr_buffers = brw->vb.nr_buffers + brw->vs.prog_data->uses_vertexid;
>> + const bool uses_draw_params =
>> + brw->vs.prog_data->uses_basevertex ||
>> + brw->vs.prog_data->uses_baseinstance;
>> + const unsigned nr_buffers = brw->vb.nr_buffers + uses_draw_params;
>> +
>> if (nr_buffers) {
>> assert(nr_buffers <= 33);
>>
>> @@ -135,7 +139,7 @@ gen8_emit_vertices(struct brw_context *brw)
>> OUT_BATCH(buffer->bo->size);
>> }
>>
>> - if (brw->vs.prog_data->uses_vertexid) {
>> + if (uses_draw_params) {
>> OUT_BATCH(brw->vb.nr_buffers << GEN6_VB0_INDEX_SHIFT |
>> GEN7_VB0_ADDRESS_MODIFYENABLE |
>> mocs_wb << 16);
>> @@ -148,16 +152,18 @@ gen8_emit_vertices(struct brw_context *brw)
>>
>> /* Normally we don't need an element for the SGVS attribute because the
>> * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an
>> - * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if the
>> - * vertex ID is used then it needs an element for the base vertex buffer.
>> - * Additionally if there is an edge flag element then the SGVS can't be
>> - * inserted past that so we need a dummy element to ensure that the edge
>> - * flag is the last one.
>> + * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if
>> + * we're using draw parameters then we need an element for the those
>> + * values. Additionally if there is an edge flag element then the SGVS
>> + * can't be inserted past that so we need a dummy element to ensure that
>> + * the edge flag is the last one.
>> */
>> - bool needs_sgvs_element = (brw->vs.prog_data->uses_vertexid ||
>> - (brw->vs.prog_data->uses_instanceid &&
>> - uses_edge_flag));
>> - unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element;
>> + const bool needs_sgvs_element = (brw->vs.prog_data->uses_basevertex ||
>> + brw->vs.prog_data->uses_baseinstance ||
>> + ((brw->vs.prog_data->uses_instanceid ||
>> + brw->vs.prog_data->uses_vertexid) &&
>> + uses_edge_flag));
>> + const unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element;
>>
>> /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
>> * presumably for VertexID/InstanceID.
>> @@ -212,12 +218,13 @@ gen8_emit_vertices(struct brw_context *brw)
>> }
>>
>> if (needs_sgvs_element) {
>> - if (brw->vs.prog_data->uses_vertexid) {
>> + if (brw->vs.prog_data->uses_basevertex ||
>> + brw->vs.prog_data->uses_baseinstance) {
>> OUT_BATCH(GEN6_VE0_VALID |
>> brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
>> - BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
>> + BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT);
>> OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
>> - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
>> + (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT) |
>> (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
>> (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
>> } else {
>> --
>> 2.5.0
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
More information about the mesa-dev
mailing list