[Mesa-dev] [PATCH 5/7] i965: Add support for gl_DrawIDARB and enable extension
Ian Romanick
idr at freedesktop.org
Tue Dec 15 14:39:27 PST 2015
On 12/15/2015 12:28 AM, Kristian Høgsberg Kristensen wrote:
> We have to break open a new vec4 for gl_DrawIDARB. We've used up all
> space in the vec4 we use for SGVS and gl_DrawIDARB has to come from its
> own separate vertex buffer anyway. This is because we point the vb for
> base vertex and base instance into the draw parameter BO for indirect
> draw calls, but the draw id is generated by mesa in a different buffer.
> ---
> src/mesa/drivers/dri/i965/brw_compiler.h | 1 +
> src/mesa/drivers/dri/i965/brw_context.h | 9 +++++
> src/mesa/drivers/dri/i965/brw_draw.c | 8 ++--
> src/mesa/drivers/dri/i965/brw_draw_upload.c | 45 ++++++++++++++++++++++-
> src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +
> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 10 ++++-
> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 +++++
> src/mesa/drivers/dri/i965/brw_vec4.cpp | 8 +++-
> src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 10 ++++-
> src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp | 5 +++
> src/mesa/drivers/dri/i965/gen8_draw_upload.c | 34 ++++++++++++++++-
> src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
> 12 files changed, 132 insertions(+), 11 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
> index 58ee966..2333f4a 100644
> --- a/src/mesa/drivers/dri/i965/brw_compiler.h
> +++ b/src/mesa/drivers/dri/i965/brw_compiler.h
> @@ -549,6 +549,7 @@ struct brw_vs_prog_data {
> bool uses_instanceid;
> bool uses_basevertex;
> bool uses_baseinstance;
> + bool uses_drawid;
> };
>
> struct brw_tcs_prog_data
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 1378402..97ebf06 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -919,6 +919,15 @@ struct brw_context
> */
> drm_intel_bo *draw_params_bo;
> uint32_t draw_params_offset;
> +
> + /**
> + * The value of gl_DrawID for the current _mesa_prim. This always comes
> + * in from it's own vertex buffer since it's not part of the indirect
> + * draw parameters.
> + */
> + int gl_drawid;
> + drm_intel_bo *draw_id_bo;
> + uint32_t draw_id_offset;
> } draw;
>
> struct {
> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
> index 298ac06..b0710c67 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> @@ -513,11 +513,9 @@ brw_try_draw_prims(struct gl_context *ctx,
>
> /* gl_DrawID always needs its own vertex buffer since it's not part of
> * the indirect parameter buffer. */
The */ goes on its own line.
> - if (brw->vs.prog_data->uses_drawid) {
> - brw->draw.gl_drawid = prims[i].drawid;
> - drm_intel_bo_unreference(brw->draw.draw_id_bo);
> - brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
> - }
> + brw->draw.gl_drawid = prims[i].draw_id;
> + drm_intel_bo_unreference(brw->draw.draw_id_bo);
> + brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
The previous patch (incorrectly) added this block, and it seems like
this should be conditional on uses_drawid.
>
> if (brw->gen < 6)
> brw_set_prim(brw, &prims[i]);
> diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> index ccf963c..e601190 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> @@ -599,6 +599,12 @@ brw_prepare_shader_draw_parameters(struct brw_context *brw)
> &brw->draw.draw_params_bo,
> &brw->draw.draw_params_offset);
> }
> +
> + if (brw->vs.prog_data->uses_drawid) {
> + intel_upload_data(brw, &brw->draw.gl_drawid, sizeof(brw->draw.gl_drawid), 4,
> + &brw->draw.draw_id_bo,
> + &brw->draw.draw_id_offset);
> + }
> }
>
> /**
> @@ -663,6 +669,8 @@ brw_emit_vertices(struct brw_context *brw)
> if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid ||
> brw->vs.prog_data->uses_basevertex || brw->vs.prog_data->uses_baseinstance)
> ++nr_elements;
> + if (brw->vs.prog_data->uses_drawid)
> + nr_elements++;
>
> /* If the VS doesn't read any inputs (calculating vertex position from
> * a state variable for some reason, for example), emit a single pad
> @@ -699,7 +707,8 @@ brw_emit_vertices(struct brw_context *brw)
> const bool uses_draw_params =
> brw->vs.prog_data->uses_basevertex ||
> brw->vs.prog_data->uses_baseinstance;
> - const unsigned nr_buffers = brw->vb.nr_buffers + uses_draw_params;
> + const unsigned nr_buffers = brw->vb.nr_buffers +
> + uses_draw_params + brw->vs.prog_data->uses_drawid;
>
> if (nr_buffers) {
> if (brw->gen >= 6) {
> @@ -726,6 +735,16 @@ brw_emit_vertices(struct brw_context *brw)
> 0, /* stride */
> 0); /* step rate */
> }
> +
> + if (brw->vs.prog_data->uses_drawid) {
> + EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers + 1,
> + brw->draw.draw_id_bo,
> + brw->draw.draw_id_bo->size - 1,
> + brw->draw.draw_id_offset,
> + 0, /* stride */
> + 0); /* step rate */
> + }
> +
> ADVANCE_BATCH();
> }
>
> @@ -839,6 +858,30 @@ brw_emit_vertices(struct brw_context *brw)
> OUT_BATCH(dw1);
> }
>
> + if (brw->vs.prog_data->uses_drawid) {
> + uint32_t dw0 = 0, dw1 = 0;
> +
> + dw1 = (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
> + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
> + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
> + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT);
> +
> + if (brw->gen >= 6) {
> + dw0 |= GEN6_VE0_VALID |
> + ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) |
> + (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
> + } else {
> + dw0 |= BRW_VE0_VALID |
> + ((brw->vb.nr_buffers + 1) << BRW_VE0_INDEX_SHIFT) |
> + (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
If we restrict this extension to core profile (i.e., only when
ARB_draw_indirect is available), we can delete a bunch of this. We only
do ARB_draw_indirect on GEN7+.
> +
> + dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
Mixed tabs and spaces.
> + }
> +
> + OUT_BATCH(dw0);
> + OUT_BATCH(dw1);
> + }
> +
> if (brw->gen >= 6 && gen6_edgeflag_input) {
> uint32_t format =
> brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 918bc0f..359e138 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -1674,6 +1674,8 @@ fs_visitor::assign_vs_urb_setup()
> if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
> vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance)
> count++;
> + if (vs_prog_data->uses_drawid)
> + count++;
>
> /* Each attribute is 4 regs. */
> this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 2e4937e..9e9c700 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -228,6 +228,13 @@ emit_system_values_block(nir_block *block, void *void_visitor)
> *reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_INSTANCE);
> break;
>
> + case nir_intrinsic_load_draw_id:
> + assert(v->stage == MESA_SHADER_VERTEX);
> + reg = &v->nir_system_values[SYSTEM_VALUE_DRAW_ID];
> + if (reg->file == BAD_FILE)
> + *reg = *v->emit_vs_system_value(SYSTEM_VALUE_DRAW_ID);
> + break;
> +
> case nir_intrinsic_load_invocation_id:
> assert(v->stage == MESA_SHADER_GEOMETRY);
> reg = &v->nir_system_values[SYSTEM_VALUE_INVOCATION_ID];
> @@ -1739,7 +1746,8 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
> case nir_intrinsic_load_vertex_id_zero_base:
> case nir_intrinsic_load_base_vertex:
> case nir_intrinsic_load_instance_id:
> - case nir_intrinsic_load_base_instance: {
> + case nir_intrinsic_load_base_instance:
> + case nir_intrinsic_load_draw_id: {
> gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
> fs_reg val = nir_system_values[sv];
> assert(val.file != BAD_FILE);
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index ae98de1..21492ac 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -59,6 +59,16 @@ fs_visitor::emit_vs_system_value(int location)
> reg->reg_offset = 3;
> vs_prog_data->uses_instanceid = true;
> break;
> + case SYSTEM_VALUE_DRAW_ID:
> + if (nir->info.system_values_read &
> + (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
> + BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
> + BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
> + BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID)))
> + reg->nr += 4;
> + reg->reg_offset = 0;
> + vs_prog_data->uses_drawid = true;
> + break;
> default:
> unreachable("not reached");
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 71c0f9f..34f37a3 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -1555,7 +1555,7 @@ int
> vec4_vs_visitor::setup_attributes(int payload_reg)
> {
> int nr_attributes;
> - int attribute_map[VERT_ATTRIB_MAX + 1];
> + int attribute_map[VERT_ATTRIB_MAX + 2];
> memset(attribute_map, 0, sizeof(attribute_map));
>
> nr_attributes = 0;
> @@ -1566,6 +1566,11 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
> }
> }
>
> + if (vs_prog_data->uses_drawid) {
> + attribute_map[VERT_ATTRIB_MAX + 1] = payload_reg + nr_attributes;
> + nr_attributes++;
> + }
> +
> /* VertexID is stored by the VF as the last vertex element, but we
> * don't represent it with a flag in inputs_read, so we call it
> * VERT_ATTRIB_MAX.
> @@ -1573,6 +1578,7 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
> if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
> vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) {
> attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes;
> + nr_attributes++;
> }
>
> lower_attributes_to_hw_regs(attribute_map, false /* interleaved */);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> index c6f07d5..e3901b8 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> @@ -85,6 +85,13 @@ vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
> glsl_type::int_type);
> break;
>
> + case nir_intrinsic_load_draw_id:
> + reg = &nir_system_values[SYSTEM_VALUE_DRAW_ID];
> + if (reg->file == BAD_FILE)
> + *reg = *make_reg_for_system_value(SYSTEM_VALUE_DRAW_ID,
> + glsl_type::int_type);
> + break;
> +
> default:
> break;
> }
> @@ -658,7 +665,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
> case nir_intrinsic_load_vertex_id_zero_base:
> case nir_intrinsic_load_base_vertex:
> case nir_intrinsic_load_instance_id:
> - case nir_intrinsic_load_base_instance: {
> + case nir_intrinsic_load_base_instance:
> + case nir_intrinsic_load_draw_id: {
> gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
> src_reg val = src_reg(nir_system_values[sv]);
> assert(val.file != BAD_FILE);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
> index bd6a9a4..1d69149 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
> @@ -170,6 +170,11 @@ vec4_vs_visitor::make_reg_for_system_value(int location,
> reg->writemask = WRITEMASK_W;
> vs_prog_data->uses_instanceid = true;
> break;
> + case SYSTEM_VALUE_DRAW_ID:
> + reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX + 1);
> + reg->writemask = WRITEMASK_X;
> + vs_prog_data->uses_drawid = true;
> + break;
> default:
> unreachable("not reached");
> }
> diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
> index 451cf0b..ff89e5f 100644
> --- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
> +++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
> @@ -118,7 +118,8 @@ gen8_emit_vertices(struct brw_context *brw)
> const bool uses_draw_params =
> brw->vs.prog_data->uses_basevertex ||
> brw->vs.prog_data->uses_baseinstance;
> - const unsigned nr_buffers = brw->vb.nr_buffers + uses_draw_params;
> + const unsigned nr_buffers = brw->vb.nr_buffers +
> + uses_draw_params + brw->vs.prog_data->uses_drawid;
>
> if (nr_buffers) {
> assert(nr_buffers <= 33);
> @@ -147,6 +148,15 @@ gen8_emit_vertices(struct brw_context *brw)
> brw->draw.draw_params_offset);
> OUT_BATCH(brw->draw.draw_params_bo->size);
> }
> +
> + if (brw->vs.prog_data->uses_drawid) {
> + OUT_BATCH((brw->vb.nr_buffers + 1) << GEN6_VB0_INDEX_SHIFT |
> + GEN7_VB0_ADDRESS_MODIFYENABLE |
> + mocs_wb << 16);
> + OUT_RELOC64(brw->draw.draw_id_bo, I915_GEM_DOMAIN_VERTEX, 0,
> + brw->draw.draw_id_offset);
> + OUT_BATCH(brw->draw.draw_id_bo->size);
> + }
> ADVANCE_BATCH();
> }
>
> @@ -163,7 +173,8 @@ gen8_emit_vertices(struct brw_context *brw)
> ((brw->vs.prog_data->uses_instanceid ||
> brw->vs.prog_data->uses_vertexid) &&
> uses_edge_flag));
> - const unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element;
> + const unsigned nr_elements =
> + brw->vb.nr_enabled + needs_sgvs_element + brw->vs.prog_data->uses_drawid;
>
> /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
> * presumably for VertexID/InstanceID.
> @@ -236,6 +247,16 @@ gen8_emit_vertices(struct brw_context *brw)
> }
> }
>
> + if (brw->vs.prog_data->uses_drawid) {
> + OUT_BATCH(GEN6_VE0_VALID |
> + ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) |
> + (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT));
> + OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
> + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
> + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
> + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
> + }
> +
> if (gen6_edgeflag_input) {
> uint32_t format =
> brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
> @@ -273,6 +294,15 @@ gen8_emit_vertices(struct brw_context *brw)
> OUT_BATCH(buffer->step_rate);
> ADVANCE_BATCH();
> }
> +
> + if (brw->vs.prog_data->uses_drawid) {
> + const unsigned element = brw->vb.nr_enabled + needs_sgvs_element;
> + BEGIN_BATCH(3);
> + OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
> + OUT_BATCH(element);
> + OUT_BATCH(0);
> + ADVANCE_BATCH();
> + }
> }
>
> const struct brw_tracked_state gen8_vertices = {
> diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
> index 24761a7..c95bfbd 100644
> --- a/src/mesa/drivers/dri/i965/intel_extensions.c
> +++ b/src/mesa/drivers/dri/i965/intel_extensions.c
> @@ -203,6 +203,7 @@ intelInitExtensions(struct gl_context *ctx)
> ctx->Extensions.ARB_point_sprite = true;
> ctx->Extensions.ARB_seamless_cube_map = true;
> ctx->Extensions.ARB_shader_bit_encoding = true;
> + ctx->Extensions.ARB_shader_draw_parameters = true;
Depending on what we decide about the ARB_draw_indirect dependency, this
may need to move.
> ctx->Extensions.ARB_shader_texture_lod = true;
> ctx->Extensions.ARB_shadow = true;
> ctx->Extensions.ARB_sync = true;
>
More information about the mesa-dev
mailing list