[Mesa-dev] [PATCH 3/5] i965: Enable hardware-generated binding tables on render path.

Abdiel Janulgue abdiel.janulgue at linux.intel.com
Wed May 20 00:44:21 PDT 2015



On 05/20/2015 10:21 AM, Pohjolainen, Topi wrote:
> On Wed, May 20, 2015 at 10:11:36AM +0300, Abdiel Janulgue wrote:
>>
>>
>> On 05/20/2015 09:54 AM, Pohjolainen, Topi wrote:
>>> On Wed, May 20, 2015 at 09:32:08AM +0300, Abdiel Janulgue wrote:
>>>> This patch implements the binding table enable command which is also
>>>> used to allocate a binding table pool where where hardware-generated
>>>> binding table entries are flushed into. Each binding table offset in
>>>> the binding table pool is unique per each shader stage that are
>>>> enabled within a batch.
>>>>
>>>> Also insert the required brw_tracked_state objects to enable
>>>> hw-generated binding tables in normal render path.
>>>>
>>>> Signed-off-by: Abdiel Janulgue <abdiel.janulgue at linux.intel.com>
>>>> ---
>>>>  src/mesa/drivers/dri/i965/brw_binding_tables.c | 97 ++++++++++++++++++++++++++
>>>>  src/mesa/drivers/dri/i965/brw_context.c        |  4 ++
>>>>  src/mesa/drivers/dri/i965/brw_context.h        |  6 ++
>>>>  src/mesa/drivers/dri/i965/brw_state.h          |  7 ++
>>>>  src/mesa/drivers/dri/i965/brw_state_upload.c   |  2 +
>>>>  src/mesa/drivers/dri/i965/intel_batchbuffer.c  |  4 ++
>>>>  6 files changed, 120 insertions(+)
>>>>
>>>> diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c
>>>> index 98ff0dd..d8cb96d 100644
>>>> --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
>>>> +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
>>>> @@ -45,6 +45,23 @@
>>>>  #include "intel_batchbuffer.h"
>>>>  
>>>>  /**
>>>> + * We are required to start at this offset for binding table pointer state when
>>>> + * HW-generated binding table is enabled otherwise the GPU will hung. Note that
>>>> + * the binding table offsets are now relative to the binding tabe pool base
>>>> + * address instead of from the state batch.
>>>> + *
>>>> + * From the Bspec 3DSTATE_BINDING_TABLE_POINTERS_{PS/VS/GS/DS/HS} > Pointer to
>>>> + * PS Binding Table section lists the format as:
>>>> + *
>>>> + *	"SurfaceStateOffset[16:6]BINDING_TABLE_STATE*256 When
>>>> + *	HW-generated binding table is enabled"
>>>> + *
>>>> + * When HW-generated binding tables are enabled, Surface State Offsets are
>>>> + * 16-bit entries.
>>>> + */
>>>> +#define HW_BT_START_OFFSET 2 * 256;
> 
> There is also extra ; in the end.
> 
>>>
>>> Just checking that I'm reading this right, the multiplier two here is based
>>> on your experiments and it is not found in the spec?
>>
>> It's in the spec. 2 is "BINDING_TABLE_STATE" which is 16-bits.
> 
> Ah, okay, now I get the last two lines of the documentation. I would have
> probably written it other way around then, 256 * 2 (num_elems * elem_size),
> even as 256 * sizeof(uint16_t).

256 * sizeof(uint16_t) looks good! I'll do that in the next version.

Thanks,

> 
>>
>>>
>>>> +
>>>> +/**
>>>>   * Upload a shader stage's binding table as indirect state.
>>>>   *
>>>>   * This copies brw_stage_state::surf_offset[] into the indirect state section
>>>> @@ -170,6 +187,86 @@ const struct brw_tracked_state brw_gs_binding_table = {
>>>>     .emit = brw_gs_upload_binding_table,
>>>>  };
>>>>  
>>>> +/**
>>>> + * Hardware-generated binding tables for the resource streamer
>>>> + */
>>>> +void
>>>> +gen7_disable_hw_binding_tables(struct brw_context *brw)
>>>> +{
>>>> +   BEGIN_BATCH(3);
>>>> +   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2));
>>>> +   OUT_BATCH(SET_FIELD(BRW_HW_BINDING_TABLE_OFF, BRW_HW_BINDING_TABLE_ENABLE) |
>>>> +             brw->is_haswell ? HSW_HW_BINDING_TABLE_RESERVED : 0);
>>>> +   OUT_BATCH(0);
>>>> +   ADVANCE_BATCH();
>>>> +
>>>> +   /* From the BSpec, 3D Pipeline > Resource Streamer > Hardware Binding
>>>> +    * Tables > Programming note
>>>> +
>>>> +    * "When switching between HW and SW binding table generation, SW must
>>>> +    * issue a state cache invalidate."
>>>> +    */
>>>> +   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
>>>> +}
>>>> +
>>>> +void
>>>> +gen7_enable_hw_binding_tables(struct brw_context *brw)
>>>> +{
>>>> +   if (!brw->has_resource_streamer) {
>>>> +      gen7_disable_hw_binding_tables(brw);
>>>> +      return;
>>>> +   }
>>>> +
>>>> +   if (!brw->hw_bt_pool.bo) {
>>>> +      /* We use a single re-usable buffer object for the lifetime of the
>>>> +       * context and size it to maximum allowed binding tables that can be
>>>> +       * programmed per batch:
>>>> +       *
>>>> +       * BSpec, 3D Pipeline > Resource Streamer > Hardware Binding Tables:
>>>> +       * "A maximum of 16,383 Binding tables are allowed in any batch buffer"
>>>> +       */
>>>> +      static const int max_size = 16383 * 4;
>>>> +      brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
>>>> +                                              max_size, 64);
>>>> +      brw->hw_bt_pool.next_offset = HW_BT_START_OFFSET;
>>>> +   }
>>>> +
>>>> +   uint32_t dw1 = SET_FIELD(BRW_HW_BINDING_TABLE_ON,
>>>> +                            BRW_HW_BINDING_TABLE_ENABLE);
>>>> +   if (brw->is_haswell)
>>>> +      dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_MOCS) |
>>>> +         HSW_HW_BINDING_TABLE_RESERVED;
>>>> +
>>>> +   BEGIN_BATCH(3);
>>>> +   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2));
>>>> +   OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
>>>> +   OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
>>>> +             brw->hw_bt_pool.bo->size);
>>>> +   ADVANCE_BATCH();
>>>> +
>>>> +   /* From the BSpec, 3D Pipeline > Resource Streamer > Hardware Binding
>>>> +    * Tables > Programming note
>>>> +
>>>> +    * "When switching between HW and SW binding table generation, SW must
>>>> +    * issue a state cache invalidate."
>>>> +    */
>>>> +   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
>>>> +}
>>>> +
>>>> +void
>>>> +gen7_reset_rs_pool_offsets(struct brw_context *brw)
>>>> +{
>>>> +   brw->hw_bt_pool.next_offset = HW_BT_START_OFFSET;
>>>> +}
>>>> +
>>>> +const struct brw_tracked_state gen7_hw_binding_tables = {
>>>> +   .dirty = {
>>>> +      .mesa = 0,
>>>> +      .brw = BRW_NEW_BATCH,
>>>> +   },
>>>> +   .emit = gen7_enable_hw_binding_tables
>>>> +};
>>>> +
>>>>  /** @} */
>>>>  
>>>>  /**
>>>> diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
>>>> index ea56859..4a572d1 100644
>>>> --- a/src/mesa/drivers/dri/i965/brw_context.c
>>>> +++ b/src/mesa/drivers/dri/i965/brw_context.c
>>>> @@ -961,6 +961,10 @@ intelDestroyContext(__DRIcontext * driContextPriv)
>>>>     if (brw->wm.base.scratch_bo)
>>>>        drm_intel_bo_unreference(brw->wm.base.scratch_bo);
>>>>  
>>>> +   gen7_reset_rs_pool_offsets(brw);
>>>> +   drm_intel_bo_unreference(brw->hw_bt_pool.bo);
>>>> +   brw->hw_bt_pool.bo = NULL;
>>>> +
>>>>     drm_intel_gem_context_destroy(brw->hw_ctx);
>>>>  
>>>>     if (ctx->swrast_context) {
>>>> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
>>>> index 3f8e59d..94127b6 100644
>>>> --- a/src/mesa/drivers/dri/i965/brw_context.h
>>>> +++ b/src/mesa/drivers/dri/i965/brw_context.h
>>>> @@ -1404,6 +1404,12 @@ struct brw_context
>>>>        struct brw_cs_prog_data *prog_data;
>>>>     } cs;
>>>>  
>>>> +   /* RS hardware binding table */
>>>> +   struct {
>>>> +      drm_intel_bo *bo;
>>>> +      uint32_t next_offset;
>>>> +   } hw_bt_pool;
>>>> +
>>>>     struct {
>>>>        uint32_t state_offset;
>>>>        uint32_t blend_state_offset;
>>>> diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
>>>> index 987672f..52dea58 100644
>>>> --- a/src/mesa/drivers/dri/i965/brw_state.h
>>>> +++ b/src/mesa/drivers/dri/i965/brw_state.h
>>>> @@ -132,6 +132,7 @@ extern const struct brw_tracked_state gen7_sol_state;
>>>>  extern const struct brw_tracked_state gen7_urb;
>>>>  extern const struct brw_tracked_state gen7_vs_state;
>>>>  extern const struct brw_tracked_state gen7_wm_state;
>>>> +extern const struct brw_tracked_state gen7_hw_binding_tables;
>>>>  extern const struct brw_tracked_state haswell_cut_index;
>>>>  extern const struct brw_tracked_state gen8_blend_state;
>>>>  extern const struct brw_tracked_state gen8_disable_stages;
>>>> @@ -372,6 +373,12 @@ gen7_upload_constant_state(struct brw_context *brw,
>>>>                             const struct brw_stage_state *stage_state,
>>>>                             bool active, unsigned opcode);
>>>>  
>>>> +/* gen7_misc_state.c */
>>>> +void gen7_rs_control(struct brw_context *brw, int enable);
>>>> +void gen7_enable_hw_binding_tables(struct brw_context *brw);
>>>> +void gen7_disable_hw_binding_tables(struct brw_context *brw);
>>>> +void gen7_reset_rs_pool_offsets(struct brw_context *brw);
>>>> +
>>>>  #ifdef __cplusplus
>>>>  }
>>>>  #endif
>>>> diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
>>>> index 84b0861..a9d64bd 100644
>>>> --- a/src/mesa/drivers/dri/i965/brw_state_upload.c
>>>> +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
>>>> @@ -191,6 +191,8 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
>>>>     &gen6_color_calc_state,	/* must do before cc unit */
>>>>     &gen6_depth_stencil_state,	/* must do before cc unit */
>>>>  
>>>> +   &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
>>>> +
>>>>     &gen6_vs_push_constants, /* Before vs_state */
>>>>     &gen6_gs_push_constants, /* Before gs_state */
>>>>     &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
>>>> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
>>>> index a2a3a95..caeb31b 100644
>>>> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
>>>> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
>>>> @@ -32,6 +32,7 @@
>>>>  #include "intel_buffers.h"
>>>>  #include "intel_fbo.h"
>>>>  #include "brw_context.h"
>>>> +#include "brw_state.h"
>>>>  
>>>>  #include <xf86drm.h>
>>>>  #include <i915_drm.h>
>>>> @@ -379,6 +380,9 @@ _intel_batchbuffer_flush(struct brw_context *brw,
>>>>        drm_intel_bo_wait_rendering(brw->batch.bo);
>>>>     }
>>>>  
>>>> +   if (brw->gen >= 7)
>>>> +      gen7_reset_rs_pool_offsets(brw);
>>>> +
>>>>     /* Start a new batch buffer. */
>>>>     brw_new_batch(brw);
>>>>  
>>>> -- 
>>>> 1.9.1
>>>>
>>>> _______________________________________________
>>>> mesa-dev mailing list
>>>> mesa-dev at lists.freedesktop.org
>>>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>>
> 


More information about the mesa-dev mailing list