[Mesa-dev] [PATCH 3/6] i965: Enable hardware-generated binding tables on render path.

Abdiel Janulgue abdiel.janulgue at linux.intel.com
Fri Jun 26 04:14:34 PDT 2015



On 06/26/2015 11:19 AM, Ville Syrjälä wrote:
> On Fri, Jun 26, 2015 at 08:51:59AM +0300, Abdiel Janulgue wrote:
>> This patch implements the binding table enable command which is also
>> used to allocate a binding table pool where where hardware-generated
>> binding table entries are flushed into. Each binding table offset in
>> the binding table pool is unique per each shader stage that are
>> enabled within a batch.
>>
>> Also insert the required brw_tracked_state objects to enable
>> hw-generated binding tables in normal render path.
>>
>> v2: - Use MOCS in binding table pool alloc for GEN8
>>     - Fix spurious offset when allocating binding table pool entry
>>       and start from zero instead.
>> v3  - Include GEN8 fix for spurious offset above.
>>
>> Cc: kenneth at whitecape.org
>> Signed-off-by: Abdiel Janulgue <abdiel.janulgue at linux.intel.com>
>> ---
>>  src/mesa/drivers/dri/i965/brw_binding_tables.c | 87 ++++++++++++++++++++++++++
>>  src/mesa/drivers/dri/i965/brw_context.c        |  4 ++
>>  src/mesa/drivers/dri/i965/brw_context.h        |  6 ++
>>  src/mesa/drivers/dri/i965/brw_state.h          |  6 ++
>>  src/mesa/drivers/dri/i965/brw_state_upload.c   |  4 ++
>>  src/mesa/drivers/dri/i965/gen7_disable.c       |  4 +-
>>  src/mesa/drivers/dri/i965/gen8_disable.c       |  4 +-
>>  src/mesa/drivers/dri/i965/intel_batchbuffer.c  |  4 ++
>>  8 files changed, 115 insertions(+), 4 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c
>> index 98ff0dd..6bc540f 100644
>> --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
>> +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
>> @@ -170,6 +170,93 @@ const struct brw_tracked_state brw_gs_binding_table = {
>>     .emit = brw_gs_upload_binding_table,
>>  };
>>  
>> +/**
>> + * Hardware-generated binding tables for the resource streamer
>> + */
>> +void
>> +gen7_disable_hw_binding_tables(struct brw_context *brw)
>> +{
>> +   int pkt_len = brw->gen >= 8 ? 4 : 3;
>> +
>> +   BEGIN_BATCH(3);
>> +   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
>> +   OUT_BATCH(brw->is_haswell ? HSW_BT_POOL_ALLOC_MUST_BE_ONE : 0);
>> +   OUT_BATCH(0);
>> +   ADVANCE_BATCH();
> 
> Doesn't look quite right for gen8. Something like this perhaps?
> 
> BEGIN_BATCH(pkt_len);
> OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
> if (brw->gen >= 8) {
>     OUT_BATCH(0);
>     OUT_BATCH(0);
>     OUT_BATCH(0);
> } else {
>     OUT_BATCH(HSW_BT_POOL_ALLOC_MUST_BE_ONE);
>     OUT_BATCH(0);
> }
> ADVANCE_BATCH();

Good catch! GEN8 hardware didn't complain about this. And neither did
the sim. I wonder why this worked without it. But I'll put it in. Thanks!


> 
>> +
>> +   /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
>> +    * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
>> +    *
>> +    * "When switching between HW and SW binding table generation, SW must
>> +    * issue a state cache invalidate."
>> +    */
>> +   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
>> +}
>> +
>> +void
>> +gen7_enable_hw_binding_tables(struct brw_context *brw)
>> +{
>> +   if (!brw->use_resource_streamer)
>> +      return;
>> +
>> +   if (!brw->hw_bt_pool.bo) {
>> +      /* We use a single re-usable buffer object for the lifetime of the
>> +       * context and size it to maximum allowed binding tables that can be
>> +       * programmed per batch:
>> +       *
>> +       * From the Haswell PRM, Volume 7: 3D Media GPGPU,
>> +       * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
>> +       * "A maximum of 16,383 Binding tables are allowed in any batch buffer"
>> +       */
>> +      static const int max_size = 16383 * 4;
>> +      brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
>> +                                              max_size, 64);
>> +      brw->hw_bt_pool.next_offset = 0;
>> +   }
>> +
>> +   int pkt_len = brw->gen >= 8 ? 4 : 3;
>> +   uint32_t dw1 = BRW_HW_BINDING_TABLE_ENABLE;
>> +   if (brw->is_haswell)
>> +      dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_POOL_MOCS) |
>> +         HSW_BT_POOL_ALLOC_MUST_BE_ONE;
>> +   else if (brw->gen >= 8)
>> +      dw1 |= BDW_MOCS_WB;
>> +
>> +   BEGIN_BATCH(3);
> 
> BEGIN_BATCH(pkt_len);

Yep!

> 
>> +   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
>> +   if (brw->gen >= 8) {
>> +      OUT_RELOC64(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
>> +      OUT_BATCH(brw->hw_bt_pool.bo->size);
>> +   } else {
>> +      OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
>> +      OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
>> +             brw->hw_bt_pool.bo->size);
>> +   }
>> +   ADVANCE_BATCH();
>> +
>> +   /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
>> +    * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
>> +    *
>> +    * "When switching between HW and SW binding table generation, SW must
>> +    * issue a state cache invalidate."
>> +    */
>> +   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
>> +}
>> +
>> +void
>> +gen7_reset_hw_bt_pool_offsets(struct brw_context *brw)
>> +{
>> +   brw->hw_bt_pool.next_offset = 0;
>> +}
>> +
>> +const struct brw_tracked_state gen7_hw_binding_tables = {
>> +   .dirty = {
>> +      .mesa = 0,
>> +      .brw = BRW_NEW_BATCH,
>> +   },
>> +   .emit = gen7_enable_hw_binding_tables
>> +};
>> +
>>  /** @} */
>>  
>>  /**
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
>> index 6b08216..f2bd2d3 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.c
>> +++ b/src/mesa/drivers/dri/i965/brw_context.c
>> @@ -936,6 +936,10 @@ intelDestroyContext(__DRIcontext * driContextPriv)
>>     if (brw->wm.base.scratch_bo)
>>        drm_intel_bo_unreference(brw->wm.base.scratch_bo);
>>  
>> +   gen7_reset_hw_bt_pool_offsets(brw);
>> +   drm_intel_bo_unreference(brw->hw_bt_pool.bo);
>> +   brw->hw_bt_pool.bo = NULL;
>> +
>>     drm_intel_gem_context_destroy(brw->hw_ctx);
>>  
>>     if (ctx->swrast_context) {
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
>> index 10d8f1e..6d237bb 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.h
>> +++ b/src/mesa/drivers/dri/i965/brw_context.h
>> @@ -1394,6 +1394,12 @@ struct brw_context
>>        struct brw_cs_prog_data *prog_data;
>>     } cs;
>>  
>> +   /* RS hardware binding table */
>> +   struct {
>> +      drm_intel_bo *bo;
>> +      uint32_t next_offset;
>> +   } hw_bt_pool;
>> +
>>     struct {
>>        uint32_t state_offset;
>>        uint32_t blend_state_offset;
>> diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
>> index 987672f..f8ef98f 100644
>> --- a/src/mesa/drivers/dri/i965/brw_state.h
>> +++ b/src/mesa/drivers/dri/i965/brw_state.h
>> @@ -132,6 +132,7 @@ extern const struct brw_tracked_state gen7_sol_state;
>>  extern const struct brw_tracked_state gen7_urb;
>>  extern const struct brw_tracked_state gen7_vs_state;
>>  extern const struct brw_tracked_state gen7_wm_state;
>> +extern const struct brw_tracked_state gen7_hw_binding_tables;
>>  extern const struct brw_tracked_state haswell_cut_index;
>>  extern const struct brw_tracked_state gen8_blend_state;
>>  extern const struct brw_tracked_state gen8_disable_stages;
>> @@ -372,6 +373,11 @@ gen7_upload_constant_state(struct brw_context *brw,
>>                             const struct brw_stage_state *stage_state,
>>                             bool active, unsigned opcode);
>>  
>> +void gen7_rs_control(struct brw_context *brw, int enable);
>> +void gen7_enable_hw_binding_tables(struct brw_context *brw);
>> +void gen7_disable_hw_binding_tables(struct brw_context *brw);
>> +void gen7_reset_hw_bt_pool_offsets(struct brw_context *brw);
>> +
>>  #ifdef __cplusplus
>>  }
>>  #endif
>> diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
>> index 7662c3b..6096b49 100644
>> --- a/src/mesa/drivers/dri/i965/brw_state_upload.c
>> +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
>> @@ -192,6 +192,8 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
>>     &gen6_color_calc_state,	/* must do before cc unit */
>>     &gen6_depth_stencil_state,	/* must do before cc unit */
>>  
>> +   &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
>> +
>>     &gen6_vs_push_constants, /* Before vs_state */
>>     &gen6_gs_push_constants, /* Before gs_state */
>>     &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
>> @@ -268,6 +270,8 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
>>     &gen8_blend_state,
>>     &gen6_color_calc_state,
>>  
>> +   &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
>> +
>>     &gen6_vs_push_constants, /* Before vs_state */
>>     &gen6_gs_push_constants, /* Before gs_state */
>>     &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
>> diff --git a/src/mesa/drivers/dri/i965/gen7_disable.c b/src/mesa/drivers/dri/i965/gen7_disable.c
>> index 2c43cd7..bb50969 100644
>> --- a/src/mesa/drivers/dri/i965/gen7_disable.c
>> +++ b/src/mesa/drivers/dri/i965/gen7_disable.c
>> @@ -52,7 +52,7 @@ disable_stages(struct brw_context *brw)
>>  
>>     BEGIN_BATCH(2);
>>     OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2));
>> -   OUT_BATCH(0);
>> +   OUT_BATCH(brw->hw_bt_pool.next_offset);
>>     ADVANCE_BATCH();
>>  
>>     /* Disable the TE */
>> @@ -85,7 +85,7 @@ disable_stages(struct brw_context *brw)
>>  
>>     BEGIN_BATCH(2);
>>     OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2));
>> -   OUT_BATCH(0);
>> +   OUT_BATCH(brw->hw_bt_pool.next_offset);
>>     ADVANCE_BATCH();
>>  }
>>  
>> diff --git a/src/mesa/drivers/dri/i965/gen8_disable.c b/src/mesa/drivers/dri/i965/gen8_disable.c
>> index da0d4a5..32508e3 100644
>> --- a/src/mesa/drivers/dri/i965/gen8_disable.c
>> +++ b/src/mesa/drivers/dri/i965/gen8_disable.c
>> @@ -66,7 +66,7 @@ disable_stages(struct brw_context *brw)
>>  
>>     BEGIN_BATCH(2);
>>     OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2));
>> -   OUT_BATCH(0);
>> +   OUT_BATCH(brw->hw_bt_pool.next_offset);
>>     ADVANCE_BATCH();
>>  
>>     /* Disable the TE */
>> @@ -101,7 +101,7 @@ disable_stages(struct brw_context *brw)
>>  
>>     BEGIN_BATCH(2);
>>     OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2));
>> -   OUT_BATCH(0);
>> +   OUT_BATCH(brw->hw_bt_pool.next_offset);
>>     ADVANCE_BATCH();
>>  
>>     BEGIN_BATCH(2);
>> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
>> index 4e82003..97ee1e2 100644
>> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
>> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
>> @@ -32,6 +32,7 @@
>>  #include "intel_buffers.h"
>>  #include "intel_fbo.h"
>>  #include "brw_context.h"
>> +#include "brw_state.h"
>>  
>>  #include <xf86drm.h>
>>  #include <i915_drm.h>
>> @@ -379,6 +380,9 @@ _intel_batchbuffer_flush(struct brw_context *brw,
>>        drm_intel_bo_wait_rendering(brw->batch.bo);
>>     }
>>  
>> +   if (brw->use_resource_streamer)
>> +      gen7_reset_hw_bt_pool_offsets(brw);
>> +
>>     /* Start a new batch buffer. */
>>     brw_new_batch(brw);
>>  
>> -- 
>> 1.9.1
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 


More information about the mesa-dev mailing list