[Mesa-dev] [PATCH 03/27] i965: Enable hardware-generated binding tables on render path.
Abdiel Janulgue
abdiel.janulgue at linux.intel.com
Fri May 8 05:18:52 PDT 2015
On 05/07/2015 04:43 PM, Pohjolainen, Topi wrote:
> On Tue, Apr 28, 2015 at 11:08:00PM +0300, Abdiel Janulgue wrote:
>> This patch implements the binding table enable command which is also
>> used to allocate a binding table pool where hardware-generated
>> binding table entries are flushed into. Each binding table offset in
>> the binding table pool is unique per each shader stage that are
>> enabled within a batch.
>>
>> Also insert the required brw_tracked_state objects to enable
>> hw-generated binding tables in normal render path.
>>
>> Signed-off-by: Abdiel Janulgue <abdiel.janulgue at linux.intel.com>
>> ---
>> src/mesa/drivers/dri/i965/brw_binding_tables.c | 70 ++++++++++++++++++++++++++
>> src/mesa/drivers/dri/i965/brw_context.c | 4 ++
>> src/mesa/drivers/dri/i965/brw_context.h | 5 ++
>> src/mesa/drivers/dri/i965/brw_state.h | 7 +++
>> src/mesa/drivers/dri/i965/brw_state_upload.c | 2 +
>> src/mesa/drivers/dri/i965/intel_batchbuffer.c | 4 ++
>> 6 files changed, 92 insertions(+)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c
>> index 459165a..a58e32e 100644
>> --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
>> +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
>> @@ -44,6 +44,11 @@
>> #include "brw_state.h"
>> #include "intel_batchbuffer.h"
>>
>> +/* Somehow the hw-binding table pool offset must start here, otherwise
>> + * the GPU will hang
>> + */
>> +#define HW_BT_START_OFFSET 256;
>
> I think we want to understand this a little better before enabling...
>
>> +
>> /**
>> * Upload a shader stage's binding table as indirect state.
>> *
>> @@ -163,6 +168,71 @@ const struct brw_tracked_state brw_gs_binding_table = {
>> .emit = brw_gs_upload_binding_table,
>> };
>>
>> +/**
>> + * Hardware-generated binding tables for the resource streamer
>> + */
>> +void
>> +gen7_disable_hw_binding_tables(struct brw_context *brw)
>> +{
>> + BEGIN_BATCH(3);
>> + OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2));
>> + OUT_BATCH(SET_FIELD(BRW_HW_BINDING_TABLE_OFF, BRW_HW_BINDING_TABLE_ENABLE) |
>> + brw->is_haswell ? HSW_HW_BINDING_TABLE_RESERVED : 0);
>> + OUT_BATCH(0);
>> + ADVANCE_BATCH();
>> +
>> + /* Pipe control workaround */
>> + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
>> +}
>> +
>> +void
>> +gen7_enable_hw_binding_tables(struct brw_context *brw)
>> +{
>> + if (!brw->has_resource_streamer) {
>> + gen7_disable_hw_binding_tables(brw);
>
> I started wondering why we really need this - RS is disabled by default and
> we haven't needed to do anything to disable it before.
>
>> + return;
>> + }
>> +
>> + if (!brw->hw_bt_pool.bo) {
>> + /* From the BSpec, 3D Pipeline > Resource Streamer > Hardware Binding Tables:
>> + *
>> + * "A maximum of 16,383 Binding tables are allowed in any batch buffer."
>> + */
>> + int max_size = 16383 * 4;
>
> But does it really need this much all the time? I guess I need to go and
> read the spec.
This is actually just one re-usable buffer object sticking around for
the lifetime of the context. Compare this with creating lots of bo
every-time we enable the resource streamer. I think it helps with
reducing the amount of relocations we have.
>
>> + brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
>> + max_size, 64);
>> + brw->hw_bt_pool.next_offset = HW_BT_START_OFFSET;
>> + }
>> +
>> + uint32_t dw1 = SET_FIELD(BRW_HW_BINDING_TABLE_ON, BRW_HW_BINDING_TABLE_ENABLE);
>> + if (brw->is_haswell)
>> + dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_MOCS) | HSW_HW_BINDING_TABLE_RESERVED;
>
> These are overflowing 80 columns.
>
>> +
>> + BEGIN_BATCH(3);
>> + OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2));
>> + OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
>> + OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
>> + brw->hw_bt_pool.bo->size);
>> + ADVANCE_BATCH();
>> +
>> + /* Pipe control workaround */
>> + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
>
> Would you have a spec reference for this?
3D-Media-GPGPU Engine > Resource Streamer [HSW+] > Hardware Binding
Tables [HSW+] > Programming note
"When switching between HW and SW binding table generation, SW must
issue a state cache invalidate."
>
>> +}
>> +
>> +void
>> +gen7_reset_rs_pool_offsets(struct brw_context *brw)
>> +{
>> + brw->hw_bt_pool.next_offset = HW_BT_START_OFFSET;
>> +}
>> +
>> +const struct brw_tracked_state gen7_hw_binding_tables = {
>> + .dirty = {
>> + .mesa = 0,
>> + .brw = BRW_NEW_BATCH,
>> + },
>> + .emit = gen7_enable_hw_binding_tables
>> +};
>> +
>> /** @} */
>>
>> /**
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
>> index c7e1e81..9c7ccae 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.c
>> +++ b/src/mesa/drivers/dri/i965/brw_context.c
>> @@ -953,6 +953,10 @@ intelDestroyContext(__DRIcontext * driContextPriv)
>> if (brw->wm.base.scratch_bo)
>> drm_intel_bo_unreference(brw->wm.base.scratch_bo);
>>
>> + gen7_reset_rs_pool_offsets(brw);
>> + drm_intel_bo_unreference(brw->hw_bt_pool.bo);
>> + brw->hw_bt_pool.bo = NULL;
>> +
>> drm_intel_gem_context_destroy(brw->hw_ctx);
>>
>> if (ctx->swrast_context) {
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
>> index 07626af..1c72b74 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.h
>> +++ b/src/mesa/drivers/dri/i965/brw_context.h
>> @@ -1360,6 +1360,11 @@ struct brw_context
>> uint32_t fast_clear_op;
>> } wm;
>>
>> + /* RS hardware binding table */
>> + struct {
>> + drm_intel_bo *bo;
>> + uint32_t next_offset;
>> + } hw_bt_pool;
>>
>> struct {
>> uint32_t state_offset;
>> diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
>> index cfa67b6..d882bdd 100644
>> --- a/src/mesa/drivers/dri/i965/brw_state.h
>> +++ b/src/mesa/drivers/dri/i965/brw_state.h
>> @@ -130,6 +130,7 @@ extern const struct brw_tracked_state gen7_sol_state;
>> extern const struct brw_tracked_state gen7_urb;
>> extern const struct brw_tracked_state gen7_vs_state;
>> extern const struct brw_tracked_state gen7_wm_state;
>> +extern const struct brw_tracked_state gen7_hw_binding_tables;
>> extern const struct brw_tracked_state haswell_cut_index;
>> extern const struct brw_tracked_state gen8_blend_state;
>> extern const struct brw_tracked_state gen8_disable_stages;
>> @@ -297,6 +298,12 @@ gen7_upload_constant_state(struct brw_context *brw,
>> const struct brw_stage_state *stage_state,
>> bool active, unsigned opcode);
>>
>> +/* gen7_misc_state.c */
>> +void gen7_rs_control(struct brw_context *brw, int enable);
>> +void gen7_enable_hw_binding_tables(struct brw_context *brw);
>> +void gen7_disable_hw_binding_tables(struct brw_context *brw);
>> +void gen7_reset_rs_pool_offsets(struct brw_context *brw);
>> +
>> #ifdef __cplusplus
>> }
>> #endif
>> diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
>> index ab316bf..e9f3bbd 100644
>> --- a/src/mesa/drivers/dri/i965/brw_state_upload.c
>> +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
>> @@ -190,6 +190,8 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
>> &gen6_color_calc_state, /* must do before cc unit */
>> &gen6_depth_stencil_state, /* must do before cc unit */
>>
>> + &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
>> +
>> &gen6_vs_push_constants, /* Before vs_state */
>> &gen6_gs_push_constants, /* Before gs_state */
>> &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
>> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
>> index bce5830..509e898 100644
>> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
>> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
>> @@ -32,6 +32,7 @@
>> #include "intel_buffers.h"
>> #include "intel_fbo.h"
>> #include "brw_context.h"
>> +#include "brw_state.h"
>>
>> #include <xf86drm.h>
>> #include <i915_drm.h>
>> @@ -374,6 +375,9 @@ _intel_batchbuffer_flush(struct brw_context *brw,
>> drm_intel_bo_wait_rendering(brw->batch.bo);
>> }
>>
>> + if (brw->gen >= 7)
>> + gen7_reset_rs_pool_offsets(brw);
>> +
>> /* Start a new batch buffer. */
>> brw_new_batch(brw);
>>
>> --
>> 1.9.1
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
More information about the mesa-dev
mailing list