[Mesa-dev] [PATCH v5 3/6] i965: Enable hardware-generated binding tables on render path.
Kenneth Graunke
kenneth at whitecape.org
Thu Jul 16 19:26:57 PDT 2015
On Tuesday, July 07, 2015 11:50:21 AM Abdiel Janulgue wrote:
> This patch implements the binding table enable command which is also
> used to allocate a binding table pool where where hardware-generated
> binding table entries are flushed into. Each binding table offset in
> the binding table pool is unique per each shader stage that are
> enabled within a batch.
>
> Also insert the required brw_tracked_state objects to enable
> hw-generated binding tables in normal render path.
>
> v2: - Use MOCS in binding table pool alloc for GEN8
> - Fix spurious offset when allocating binding table pool entry
> and start from zero instead.
> v3: - Include GEN8 fix for spurious offset above.
> v4: - Fixup wrong packet length in enable/disable hw-binding table
> for GEN8 (Ville).
> - Don't invoke HW-binding table disable command when we dont
> have resource streamer (Chris).
> v5: - Reorder the state cache invalidate flush so it happens in-between
> enabling hw-generated binding tables and the previous sw-binding
> table GPU state (Chris).
>
> Cc: kenneth at whitecape.org
> Cc: syrjala at sci.fi
> Cc: chris at chris-wilson.co.uk
> Signed-off-by: Abdiel Janulgue <abdiel.janulgue at linux.intel.com>
> ---
> src/mesa/drivers/dri/i965/brw_binding_tables.c | 96 ++++++++++++++++++++++++++
> src/mesa/drivers/dri/i965/brw_context.c | 4 ++
> src/mesa/drivers/dri/i965/brw_context.h | 6 ++
> src/mesa/drivers/dri/i965/brw_state.h | 6 ++
> src/mesa/drivers/dri/i965/brw_state_upload.c | 4 ++
> src/mesa/drivers/dri/i965/gen7_disable.c | 4 +-
> src/mesa/drivers/dri/i965/gen8_disable.c | 4 +-
> src/mesa/drivers/dri/i965/intel_batchbuffer.c | 4 ++
> 8 files changed, 124 insertions(+), 4 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c
> index 98ff0dd..2f32976 100644
> --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
> +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
> @@ -170,6 +170,102 @@ const struct brw_tracked_state brw_gs_binding_table = {
> .emit = brw_gs_upload_binding_table,
> };
>
> +/**
> + * Hardware-generated binding tables for the resource streamer
> + */
Comment still isn't sensible. Perhaps
/**
* Disable hardware binding table support, falling back to the
* older software-generated binding table mechanism.
*/
> +void
> +gen7_disable_hw_binding_tables(struct brw_context *brw)
> +{
> + if (!brw->use_resource_streamer)
> + return;
> +
> + int pkt_len = brw->gen >= 8 ? 4 : 3;
> +
> + BEGIN_BATCH(pkt_len);
> + OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
> + if (brw->gen >= 8) {
> + OUT_BATCH(0);
> + OUT_BATCH(0);
> + OUT_BATCH(0);
> + } else {
> + OUT_BATCH(HSW_BT_POOL_ALLOC_MUST_BE_ONE);
> + OUT_BATCH(0);
> + }
> + ADVANCE_BATCH();
> +
> + /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
> + * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
> + *
> + * "When switching between HW and SW binding table generation, SW must
> + * issue a state cache invalidate."
> + */
> + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
In the enabling case, Chris mentioned the the flush should go first - does this
need to be changed, too? Or is this right, and we should disable in the
opposite order? Chris, thoughts?
> +}
> +
/**
* Enable hardware binding tables and set up the binding table pool.
*/
> +void
> +gen7_enable_hw_binding_tables(struct brw_context *brw)
> +{
> + if (!brw->use_resource_streamer)
> + return;
> +
> + if (!brw->hw_bt_pool.bo) {
> + /* We use a single re-usable buffer object for the lifetime of the
> + * context and size it to maximum allowed binding tables that can be
> + * programmed per batch:
> + *
> + * From the Haswell PRM, Volume 7: 3D Media GPGPU,
> + * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
> + * "A maximum of 16,383 Binding tables are allowed in any batch buffer"
> + */
> + static const int max_size = 16383 * 4;
> + brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
> + max_size, 64);
> + brw->hw_bt_pool.next_offset = 0;
> + }
> +
> + /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
> + * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
> + *
> + * "When switching between HW and SW binding table generation, SW must
> + * issue a state cache invalidate."
> + */
> + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> +
> + int pkt_len = brw->gen >= 8 ? 4 : 3;
> + uint32_t dw1 = BRW_HW_BINDING_TABLE_ENABLE;
> + if (brw->is_haswell)
> + dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_POOL_MOCS) |
> + HSW_BT_POOL_ALLOC_MUST_BE_ONE;
> + else if (brw->gen >= 8)
> + dw1 |= BDW_MOCS_WB;
Multi-line statements should have braces, and the indentation is a bit
off:
if (brw->is_haswell) {
dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_POOL_MOCS) |
HSW_BT_POOL_ALLOC_MUST_BE_ONE;
} else if (brw->gen >= 8) {
dw1 |= BDW_MOCS_WB;
}
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
> +
> + BEGIN_BATCH(pkt_len);
> + OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
> + if (brw->gen >= 8) {
> + OUT_RELOC64(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
> + OUT_BATCH(brw->hw_bt_pool.bo->size);
> + } else {
> + OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
> + OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
> + brw->hw_bt_pool.bo->size);
> + }
> + ADVANCE_BATCH();
> +}
> +
> +void
> +gen7_reset_hw_bt_pool_offsets(struct brw_context *brw)
> +{
> + brw->hw_bt_pool.next_offset = 0;
> +}
> +
> +const struct brw_tracked_state gen7_hw_binding_tables = {
> + .dirty = {
> + .mesa = 0,
> + .brw = BRW_NEW_BATCH,
> + },
> + .emit = gen7_enable_hw_binding_tables
> +};
> +
> /** @} */
>
> /**
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
> index ec22497..1952ec8 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -935,6 +935,10 @@ intelDestroyContext(__DRIcontext * driContextPriv)
> if (brw->wm.base.scratch_bo)
> drm_intel_bo_unreference(brw->wm.base.scratch_bo);
>
> + gen7_reset_hw_bt_pool_offsets(brw);
> + drm_intel_bo_unreference(brw->hw_bt_pool.bo);
> + brw->hw_bt_pool.bo = NULL;
> +
> drm_intel_gem_context_destroy(brw->hw_ctx);
>
> if (ctx->swrast_context) {
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 10d8f1e..6d237bb 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1394,6 +1394,12 @@ struct brw_context
> struct brw_cs_prog_data *prog_data;
> } cs;
>
> + /* RS hardware binding table */
> + struct {
> + drm_intel_bo *bo;
> + uint32_t next_offset;
> + } hw_bt_pool;
> +
> struct {
> uint32_t state_offset;
> uint32_t blend_state_offset;
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
> index 987672f..f8ef98f 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -132,6 +132,7 @@ extern const struct brw_tracked_state gen7_sol_state;
> extern const struct brw_tracked_state gen7_urb;
> extern const struct brw_tracked_state gen7_vs_state;
> extern const struct brw_tracked_state gen7_wm_state;
> +extern const struct brw_tracked_state gen7_hw_binding_tables;
> extern const struct brw_tracked_state haswell_cut_index;
> extern const struct brw_tracked_state gen8_blend_state;
> extern const struct brw_tracked_state gen8_disable_stages;
> @@ -372,6 +373,11 @@ gen7_upload_constant_state(struct brw_context *brw,
> const struct brw_stage_state *stage_state,
> bool active, unsigned opcode);
>
> +void gen7_rs_control(struct brw_context *brw, int enable);
> +void gen7_enable_hw_binding_tables(struct brw_context *brw);
> +void gen7_disable_hw_binding_tables(struct brw_context *brw);
> +void gen7_reset_hw_bt_pool_offsets(struct brw_context *brw);
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
> index 7662c3b..6096b49 100644
> --- a/src/mesa/drivers/dri/i965/brw_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
> @@ -192,6 +192,8 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
> &gen6_color_calc_state, /* must do before cc unit */
> &gen6_depth_stencil_state, /* must do before cc unit */
>
> + &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
> +
> &gen6_vs_push_constants, /* Before vs_state */
> &gen6_gs_push_constants, /* Before gs_state */
> &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
> @@ -268,6 +270,8 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
> &gen8_blend_state,
> &gen6_color_calc_state,
>
> + &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
> +
> &gen6_vs_push_constants, /* Before vs_state */
> &gen6_gs_push_constants, /* Before gs_state */
> &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
> diff --git a/src/mesa/drivers/dri/i965/gen7_disable.c b/src/mesa/drivers/dri/i965/gen7_disable.c
> index 2c43cd7..bb50969 100644
> --- a/src/mesa/drivers/dri/i965/gen7_disable.c
> +++ b/src/mesa/drivers/dri/i965/gen7_disable.c
> @@ -52,7 +52,7 @@ disable_stages(struct brw_context *brw)
>
> BEGIN_BATCH(2);
> OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2));
> - OUT_BATCH(0);
> + OUT_BATCH(brw->hw_bt_pool.next_offset);
> ADVANCE_BATCH();
>
> /* Disable the TE */
> @@ -85,7 +85,7 @@ disable_stages(struct brw_context *brw)
>
> BEGIN_BATCH(2);
> OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2));
> - OUT_BATCH(0);
> + OUT_BATCH(brw->hw_bt_pool.next_offset);
> ADVANCE_BATCH();
> }
>
> diff --git a/src/mesa/drivers/dri/i965/gen8_disable.c b/src/mesa/drivers/dri/i965/gen8_disable.c
> index da0d4a5..32508e3 100644
> --- a/src/mesa/drivers/dri/i965/gen8_disable.c
> +++ b/src/mesa/drivers/dri/i965/gen8_disable.c
> @@ -66,7 +66,7 @@ disable_stages(struct brw_context *brw)
>
> BEGIN_BATCH(2);
> OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2));
> - OUT_BATCH(0);
> + OUT_BATCH(brw->hw_bt_pool.next_offset);
> ADVANCE_BATCH();
>
> /* Disable the TE */
> @@ -101,7 +101,7 @@ disable_stages(struct brw_context *brw)
>
> BEGIN_BATCH(2);
> OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2));
> - OUT_BATCH(0);
> + OUT_BATCH(brw->hw_bt_pool.next_offset);
> ADVANCE_BATCH();
>
> BEGIN_BATCH(2);
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index 4e82003..97ee1e2 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -32,6 +32,7 @@
> #include "intel_buffers.h"
> #include "intel_fbo.h"
> #include "brw_context.h"
> +#include "brw_state.h"
>
> #include <xf86drm.h>
> #include <i915_drm.h>
> @@ -379,6 +380,9 @@ _intel_batchbuffer_flush(struct brw_context *brw,
> drm_intel_bo_wait_rendering(brw->batch.bo);
> }
>
> + if (brw->use_resource_streamer)
> + gen7_reset_hw_bt_pool_offsets(brw);
> +
> /* Start a new batch buffer. */
> brw_new_batch(brw);
>
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150716/5f5cc5dc/attachment.sig>
More information about the mesa-dev
mailing list