[Mesa-dev] [PATCH 5/7] i965: Add support for gl_DrawIDARB and enable extension

Kristian Høgsberg Kristensen kristian.h.kristensen at intel.com
Wed Dec 16 14:15:53 PST 2015


Ian Romanick <idr at freedesktop.org> writes:

> On 12/15/2015 12:28 AM, Kristian Høgsberg Kristensen wrote:
>> We have to break open a new vec4 for gl_DrawIDARB. We've used up all
>> space in the vec4 we use for SGVS and gl_DrawIDARB has to come from its
>> own separate vertex buffer anyway.  This is because we point the vb for
>> base vertex and base instance into the draw parameter BO for indirect
>> draw calls, but the draw id is generated by mesa in a different buffer.
>> ---
>>  src/mesa/drivers/dri/i965/brw_compiler.h          |  1 +
>>  src/mesa/drivers/dri/i965/brw_context.h           |  9 +++++
>>  src/mesa/drivers/dri/i965/brw_draw.c              |  8 ++--
>>  src/mesa/drivers/dri/i965/brw_draw_upload.c       | 45 ++++++++++++++++++++++-
>>  src/mesa/drivers/dri/i965/brw_fs.cpp              |  2 +
>>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp          | 10 ++++-
>>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp      | 10 +++++
>>  src/mesa/drivers/dri/i965/brw_vec4.cpp            |  8 +++-
>>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp        | 10 ++++-
>>  src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp |  5 +++
>>  src/mesa/drivers/dri/i965/gen8_draw_upload.c      | 34 ++++++++++++++++-
>>  src/mesa/drivers/dri/i965/intel_extensions.c      |  1 +
>>  12 files changed, 132 insertions(+), 11 deletions(-)
>> 
>> diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
>> index 58ee966..2333f4a 100644
>> --- a/src/mesa/drivers/dri/i965/brw_compiler.h
>> +++ b/src/mesa/drivers/dri/i965/brw_compiler.h
>> @@ -549,6 +549,7 @@ struct brw_vs_prog_data {
>>     bool uses_instanceid;
>>     bool uses_basevertex;
>>     bool uses_baseinstance;
>> +   bool uses_drawid;
>>  };
>>  
>>  struct brw_tcs_prog_data
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
>> index 1378402..97ebf06 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.h
>> +++ b/src/mesa/drivers/dri/i965/brw_context.h
>> @@ -919,6 +919,15 @@ struct brw_context
>>         */
>>        drm_intel_bo *draw_params_bo;
>>        uint32_t draw_params_offset;
>> +
>> +      /**
>> +       * The value of gl_DrawID for the current _mesa_prim. This always comes
>> +       * in from it's own vertex buffer since it's not part of the indirect
>> +       * draw parameters.
>> +       */
>> +      int gl_drawid;
>> +      drm_intel_bo *draw_id_bo;
>> +      uint32_t draw_id_offset;
>>     } draw;
>>  
>>     struct {
>> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
>> index 298ac06..b0710c67 100644
>> --- a/src/mesa/drivers/dri/i965/brw_draw.c
>> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
>> @@ -513,11 +513,9 @@ brw_try_draw_prims(struct gl_context *ctx,
>>  
>>        /* gl_DrawID always needs its own vertex buffer since it's not part of
>>         * the indirect parameter buffer. */
>
> The */ goes on its own line.
>
>> -      if (brw->vs.prog_data->uses_drawid) {
>> -         brw->draw.gl_drawid = prims[i].drawid;
>> -         drm_intel_bo_unreference(brw->draw.draw_id_bo);
>> -         brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
>> -      }
>> +      brw->draw.gl_drawid = prims[i].draw_id;
>> +      drm_intel_bo_unreference(brw->draw.draw_id_bo);
>> +      brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
>
> The previous patch (incorrectly) added this block, and it seems like
> this should be conditional on uses_drawid.

Flagging BRW_NEW_VERTICES needs to be conditional to avoid execessive
vertex state emission and the last patch does that.  The problem is that
on the first iteration of the loop, we don't have brw->vs.prog_data,
since we haven't flushed state yet.

Fortunately, just setting these variables has no overhead: the
parameter upload happens later in the state flush
(brw_prepare_shader_draw_parameters below) where we do have VS prog_data
and can upload only if we need it.

I suppose that just doing

   if (i > 0 && brw->vs.prog_data->uses_drawid)
      brw->ctx.NewDriverState |= BRW_NEW_VERTICES;

in this patch instead of setting it unconditionally here and then fixing
in a later patch is nicer.  I'll do that in v2.

Kristian

>>  
>>        if (brw->gen < 6)
>>  	 brw_set_prim(brw, &prims[i]);
>> diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
>> index ccf963c..e601190 100644
>> --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
>> +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
>> @@ -599,6 +599,12 @@ brw_prepare_shader_draw_parameters(struct brw_context *brw)
>>  			&brw->draw.draw_params_bo,
>>                          &brw->draw.draw_params_offset);
>>     }
>> +
>> +   if (brw->vs.prog_data->uses_drawid) {
>> +      intel_upload_data(brw, &brw->draw.gl_drawid, sizeof(brw->draw.gl_drawid), 4,
>> +			&brw->draw.draw_id_bo,
>> +                        &brw->draw.draw_id_offset);
>> +   }
>>  }
>>  
>>  /**
>> @@ -663,6 +669,8 @@ brw_emit_vertices(struct brw_context *brw)
>>     if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid ||
>>         brw->vs.prog_data->uses_basevertex || brw->vs.prog_data->uses_baseinstance)
>>        ++nr_elements;
>> +   if (brw->vs.prog_data->uses_drawid)
>> +      nr_elements++;
>>  
>>     /* If the VS doesn't read any inputs (calculating vertex position from
>>      * a state variable for some reason, for example), emit a single pad
>> @@ -699,7 +707,8 @@ brw_emit_vertices(struct brw_context *brw)
>>     const bool uses_draw_params =
>>        brw->vs.prog_data->uses_basevertex ||
>>        brw->vs.prog_data->uses_baseinstance;
>> -   const unsigned nr_buffers = brw->vb.nr_buffers + uses_draw_params;
>> +   const unsigned nr_buffers = brw->vb.nr_buffers +
>> +      uses_draw_params + brw->vs.prog_data->uses_drawid;
>>  
>>     if (nr_buffers) {
>>        if (brw->gen >= 6) {
>> @@ -726,6 +735,16 @@ brw_emit_vertices(struct brw_context *brw)
>>                                    0,  /* stride */
>>                                    0); /* step rate */
>>        }
>> +
>> +      if (brw->vs.prog_data->uses_drawid) {
>> +         EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers + 1,
>> +                                  brw->draw.draw_id_bo,
>> +                                  brw->draw.draw_id_bo->size - 1,
>> +                                  brw->draw.draw_id_offset,
>> +                                  0,  /* stride */
>> +                                  0); /* step rate */
>> +      }
>> +
>>        ADVANCE_BATCH();
>>     }
>>  
>> @@ -839,6 +858,30 @@ brw_emit_vertices(struct brw_context *brw)
>>        OUT_BATCH(dw1);
>>     }
>>  
>> +   if (brw->vs.prog_data->uses_drawid) {
>> +      uint32_t dw0 = 0, dw1 = 0;
>> +
>> +      dw1 = (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
>> +            (BRW_VE1_COMPONENT_STORE_0   << BRW_VE1_COMPONENT_1_SHIFT) |
>> +            (BRW_VE1_COMPONENT_STORE_0   << BRW_VE1_COMPONENT_2_SHIFT) |
>> +            (BRW_VE1_COMPONENT_STORE_0   << BRW_VE1_COMPONENT_3_SHIFT);
>> +
>> +      if (brw->gen >= 6) {
>> +         dw0 |= GEN6_VE0_VALID |
>> +                ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) |
>> +                (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
>> +      } else {
>> +         dw0 |= BRW_VE0_VALID |
>> +                ((brw->vb.nr_buffers + 1) << BRW_VE0_INDEX_SHIFT) |
>> +                (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
>
> If we restrict this extension to core profile (i.e., only when
> ARB_draw_indirect is available), we can delete a bunch of this.  We only
> do ARB_draw_indirect on GEN7+.
>
>> +
>> +	 dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
>
> Mixed tabs and spaces.
>
>> +      }
>> +
>> +      OUT_BATCH(dw0);
>> +      OUT_BATCH(dw1);
>> +   }
>> +
>>     if (brw->gen >= 6 && gen6_edgeflag_input) {
>>        uint32_t format =
>>           brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> index 918bc0f..359e138 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> @@ -1674,6 +1674,8 @@ fs_visitor::assign_vs_urb_setup()
>>     if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
>>         vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance)
>>        count++;
>> +   if (vs_prog_data->uses_drawid)
>> +      count++;
>>  
>>     /* Each attribute is 4 regs. */
>>     this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes;
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> index 2e4937e..9e9c700 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> @@ -228,6 +228,13 @@ emit_system_values_block(nir_block *block, void *void_visitor)
>>              *reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_INSTANCE);
>>           break;
>>  
>> +      case nir_intrinsic_load_draw_id:
>> +         assert(v->stage == MESA_SHADER_VERTEX);
>> +         reg = &v->nir_system_values[SYSTEM_VALUE_DRAW_ID];
>> +         if (reg->file == BAD_FILE)
>> +            *reg = *v->emit_vs_system_value(SYSTEM_VALUE_DRAW_ID);
>> +         break;
>> +
>>        case nir_intrinsic_load_invocation_id:
>>           assert(v->stage == MESA_SHADER_GEOMETRY);
>>           reg = &v->nir_system_values[SYSTEM_VALUE_INVOCATION_ID];
>> @@ -1739,7 +1746,8 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
>>     case nir_intrinsic_load_vertex_id_zero_base:
>>     case nir_intrinsic_load_base_vertex:
>>     case nir_intrinsic_load_instance_id:
>> -   case nir_intrinsic_load_base_instance: {
>> +   case nir_intrinsic_load_base_instance:
>> +   case nir_intrinsic_load_draw_id: {
>>        gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
>>        fs_reg val = nir_system_values[sv];
>>        assert(val.file != BAD_FILE);
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> index ae98de1..21492ac 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> @@ -59,6 +59,16 @@ fs_visitor::emit_vs_system_value(int location)
>>        reg->reg_offset = 3;
>>        vs_prog_data->uses_instanceid = true;
>>        break;
>> +   case SYSTEM_VALUE_DRAW_ID:
>> +      if (nir->info.system_values_read &
>> +          (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
>> +           BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
>> +           BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
>> +           BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID)))
>> +         reg->nr += 4;
>> +      reg->reg_offset = 0;
>> +      vs_prog_data->uses_drawid = true;
>> +      break;
>>     default:
>>        unreachable("not reached");
>>     }
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> index 71c0f9f..34f37a3 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> @@ -1555,7 +1555,7 @@ int
>>  vec4_vs_visitor::setup_attributes(int payload_reg)
>>  {
>>     int nr_attributes;
>> -   int attribute_map[VERT_ATTRIB_MAX + 1];
>> +   int attribute_map[VERT_ATTRIB_MAX + 2];
>>     memset(attribute_map, 0, sizeof(attribute_map));
>>  
>>     nr_attributes = 0;
>> @@ -1566,6 +1566,11 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
>>        }
>>     }
>>  
>> +   if (vs_prog_data->uses_drawid) {
>> +      attribute_map[VERT_ATTRIB_MAX + 1] = payload_reg + nr_attributes;
>> +      nr_attributes++;
>> +   }
>> +
>>     /* VertexID is stored by the VF as the last vertex element, but we
>>      * don't represent it with a flag in inputs_read, so we call it
>>      * VERT_ATTRIB_MAX.
>> @@ -1573,6 +1578,7 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
>>     if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
>>         vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) {
>>        attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes;
>> +      nr_attributes++;
>>     }
>>  
>>     lower_attributes_to_hw_regs(attribute_map, false /* interleaved */);
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> index c6f07d5..e3901b8 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> @@ -85,6 +85,13 @@ vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
>>                                             glsl_type::int_type);
>>        break;
>>  
>> +   case nir_intrinsic_load_draw_id:
>> +      reg = &nir_system_values[SYSTEM_VALUE_DRAW_ID];
>> +      if (reg->file == BAD_FILE)
>> +         *reg = *make_reg_for_system_value(SYSTEM_VALUE_DRAW_ID,
>> +                                           glsl_type::int_type);
>> +      break;
>> +
>>     default:
>>        break;
>>     }
>> @@ -658,7 +665,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
>>     case nir_intrinsic_load_vertex_id_zero_base:
>>     case nir_intrinsic_load_base_vertex:
>>     case nir_intrinsic_load_instance_id:
>> -   case nir_intrinsic_load_base_instance: {
>> +   case nir_intrinsic_load_base_instance:
>> +   case nir_intrinsic_load_draw_id: {
>>        gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
>>        src_reg val = src_reg(nir_system_values[sv]);
>>        assert(val.file != BAD_FILE);
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
>> index bd6a9a4..1d69149 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
>> @@ -170,6 +170,11 @@ vec4_vs_visitor::make_reg_for_system_value(int location,
>>        reg->writemask = WRITEMASK_W;
>>        vs_prog_data->uses_instanceid = true;
>>        break;
>> +   case SYSTEM_VALUE_DRAW_ID:
>> +      reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX + 1);
>> +      reg->writemask = WRITEMASK_X;
>> +      vs_prog_data->uses_drawid = true;
>> +      break;
>>     default:
>>        unreachable("not reached");
>>     }
>> diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
>> index 451cf0b..ff89e5f 100644
>> --- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
>> +++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
>> @@ -118,7 +118,8 @@ gen8_emit_vertices(struct brw_context *brw)
>>     const bool uses_draw_params =
>>        brw->vs.prog_data->uses_basevertex ||
>>        brw->vs.prog_data->uses_baseinstance;
>> -   const unsigned nr_buffers = brw->vb.nr_buffers + uses_draw_params;
>> +   const unsigned nr_buffers = brw->vb.nr_buffers +
>> +      uses_draw_params + brw->vs.prog_data->uses_drawid;
>>  
>>     if (nr_buffers) {
>>        assert(nr_buffers <= 33);
>> @@ -147,6 +148,15 @@ gen8_emit_vertices(struct brw_context *brw)
>>                       brw->draw.draw_params_offset);
>>           OUT_BATCH(brw->draw.draw_params_bo->size);
>>        }
>> +
>> +      if (brw->vs.prog_data->uses_drawid) {
>> +         OUT_BATCH((brw->vb.nr_buffers + 1) << GEN6_VB0_INDEX_SHIFT |
>> +                   GEN7_VB0_ADDRESS_MODIFYENABLE |
>> +                   mocs_wb << 16);
>> +         OUT_RELOC64(brw->draw.draw_id_bo, I915_GEM_DOMAIN_VERTEX, 0,
>> +                     brw->draw.draw_id_offset);
>> +         OUT_BATCH(brw->draw.draw_id_bo->size);
>> +      }
>>        ADVANCE_BATCH();
>>     }
>>  
>> @@ -163,7 +173,8 @@ gen8_emit_vertices(struct brw_context *brw)
>>                                      ((brw->vs.prog_data->uses_instanceid ||
>>                                        brw->vs.prog_data->uses_vertexid) &&
>>                                       uses_edge_flag));
>> -   const unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element;
>> +   const unsigned nr_elements =
>> +      brw->vb.nr_enabled + needs_sgvs_element + brw->vs.prog_data->uses_drawid;
>>  
>>     /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
>>      * presumably for VertexID/InstanceID.
>> @@ -236,6 +247,16 @@ gen8_emit_vertices(struct brw_context *brw)
>>        }
>>     }
>>  
>> +   if (brw->vs.prog_data->uses_drawid) {
>> +      OUT_BATCH(GEN6_VE0_VALID |
>> +                ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) |
>> +                (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT));
>> +      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
>> +                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
>> +                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
>> +                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
>> +   }
>> +
>>     if (gen6_edgeflag_input) {
>>        uint32_t format =
>>           brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
>> @@ -273,6 +294,15 @@ gen8_emit_vertices(struct brw_context *brw)
>>        OUT_BATCH(buffer->step_rate);
>>        ADVANCE_BATCH();
>>     }
>> +
>> +   if (brw->vs.prog_data->uses_drawid) {
>> +      const unsigned element = brw->vb.nr_enabled + needs_sgvs_element;
>> +      BEGIN_BATCH(3);
>> +      OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
>> +      OUT_BATCH(element);
>> +      OUT_BATCH(0);
>> +      ADVANCE_BATCH();
>> +   }
>>  }
>>  
>>  const struct brw_tracked_state gen8_vertices = {
>> diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
>> index 24761a7..c95bfbd 100644
>> --- a/src/mesa/drivers/dri/i965/intel_extensions.c
>> +++ b/src/mesa/drivers/dri/i965/intel_extensions.c
>> @@ -203,6 +203,7 @@ intelInitExtensions(struct gl_context *ctx)
>>     ctx->Extensions.ARB_point_sprite = true;
>>     ctx->Extensions.ARB_seamless_cube_map = true;
>>     ctx->Extensions.ARB_shader_bit_encoding = true;
>> +   ctx->Extensions.ARB_shader_draw_parameters = true;
>
> Depending on what we decide about the ARB_draw_indirect dependency, this
> may need to move.
>
>>     ctx->Extensions.ARB_shader_texture_lod = true;
>>     ctx->Extensions.ARB_shadow = true;
>>     ctx->Extensions.ARB_sync = true;
>> 


More information about the mesa-dev mailing list