[Mesa-dev] [PATCH 3/6] i965: Enable hardware-generated binding tables on render path.

Abdiel Janulgue abdiel.janulgue at linux.intel.com
Wed May 20 08:46:22 PDT 2015


This patch implements the binding table enable command which is also
used to allocate a binding table pool where where hardware-generated
binding table entries are flushed into. Each binding table offset in
the binding table pool is unique per each shader stage that are
enabled within a batch.

Also insert the required brw_tracked_state objects to enable
hw-generated binding tables in normal render path.

v2: Clarify start of binding table pool offsets. (Topi)

Signed-off-by: Abdiel Janulgue <abdiel.janulgue at linux.intel.com>
---
 src/mesa/drivers/dri/i965/brw_binding_tables.c | 95 ++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_context.c        |  4 ++
 src/mesa/drivers/dri/i965/brw_context.h        |  6 ++
 src/mesa/drivers/dri/i965/brw_state.h          |  6 ++
 src/mesa/drivers/dri/i965/brw_state_upload.c   |  2 +
 src/mesa/drivers/dri/i965/intel_batchbuffer.c  |  4 ++
 6 files changed, 117 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c
index 98ff0dd..ab9b533 100644
--- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
+++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
@@ -45,6 +45,23 @@
 #include "intel_batchbuffer.h"
 
 /**
+ * We are required to start at this offset for binding table pointer state when
+ * HW-generated binding table is enabled otherwise the GPU will hung. Note that
+ * the binding table offsets are now relative to the binding tabe pool base
+ * address instead of from the state batch.
+ *
+ * From the Bspec 3DSTATE_BINDING_TABLE_POINTERS_{PS/VS/GS/DS/HS} > Pointer to
+ * PS Binding Table section lists the format as:
+ *
+ *	"SurfaceStateOffset[16:6]BINDING_TABLE_STATE*256 When
+ *	HW-generated binding table is enabled"
+ *
+ * When HW-generated binding tables are enabled, Surface State Offsets are
+ * 16-bit entries.
+ */
+static const uint32_t hw_bt_start_offset = 256 * sizeof(uint16_t);
+
+/**
  * Upload a shader stage's binding table as indirect state.
  *
  * This copies brw_stage_state::surf_offset[] into the indirect state section
@@ -170,6 +187,84 @@ const struct brw_tracked_state brw_gs_binding_table = {
    .emit = brw_gs_upload_binding_table,
 };
 
+/**
+ * Hardware-generated binding tables for the resource streamer
+ */
+void
+gen7_disable_hw_binding_tables(struct brw_context *brw)
+{
+   BEGIN_BATCH(3);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2));
+   OUT_BATCH(brw->is_haswell ? HSW_HW_BINDING_TABLE_RESERVED : 0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   /* From the BSpec, 3D Pipeline > Resource Streamer > Hardware Binding
+    * Tables > Programming note
+
+    * "When switching between HW and SW binding table generation, SW must
+    * issue a state cache invalidate."
+    */
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+}
+
+void
+gen7_enable_hw_binding_tables(struct brw_context *brw)
+{
+   if (!brw->has_resource_streamer) {
+      gen7_disable_hw_binding_tables(brw);
+      return;
+   }
+
+   if (!brw->hw_bt_pool.bo) {
+      /* We use a single re-usable buffer object for the lifetime of the
+       * context and size it to maximum allowed binding tables that can be
+       * programmed per batch:
+       *
+       * BSpec, 3D Pipeline > Resource Streamer > Hardware Binding Tables:
+       * "A maximum of 16,383 Binding tables are allowed in any batch buffer"
+       */
+      static const int max_size = 16383 * 4;
+      brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
+                                              max_size, 64);
+      brw->hw_bt_pool.next_offset = hw_bt_start_offset;
+   }
+
+   uint32_t dw1 = BRW_HW_BINDING_TABLE_ENABLE;
+   if (brw->is_haswell)
+      dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_MOCS) |
+         HSW_HW_BINDING_TABLE_RESERVED;
+
+   BEGIN_BATCH(3);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2));
+   OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
+   OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
+             brw->hw_bt_pool.bo->size);
+   ADVANCE_BATCH();
+
+   /* From the BSpec, 3D Pipeline > Resource Streamer > Hardware Binding
+    * Tables > Programming note
+
+    * "When switching between HW and SW binding table generation, SW must
+    * issue a state cache invalidate."
+    */
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+}
+
+void
+gen7_reset_rs_pool_offsets(struct brw_context *brw)
+{
+   brw->hw_bt_pool.next_offset = hw_bt_start_offset;
+}
+
+const struct brw_tracked_state gen7_hw_binding_tables = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+   },
+   .emit = gen7_enable_hw_binding_tables
+};
+
 /** @} */
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index ea56859..4a572d1 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -961,6 +961,10 @@ intelDestroyContext(__DRIcontext * driContextPriv)
    if (brw->wm.base.scratch_bo)
       drm_intel_bo_unreference(brw->wm.base.scratch_bo);
 
+   gen7_reset_rs_pool_offsets(brw);
+   drm_intel_bo_unreference(brw->hw_bt_pool.bo);
+   brw->hw_bt_pool.bo = NULL;
+
    drm_intel_gem_context_destroy(brw->hw_ctx);
 
    if (ctx->swrast_context) {
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 3f8e59d..94127b6 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1404,6 +1404,12 @@ struct brw_context
       struct brw_cs_prog_data *prog_data;
    } cs;
 
+   /* RS hardware binding table */
+   struct {
+      drm_intel_bo *bo;
+      uint32_t next_offset;
+   } hw_bt_pool;
+
    struct {
       uint32_t state_offset;
       uint32_t blend_state_offset;
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 987672f..622ce3f 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -132,6 +132,7 @@ extern const struct brw_tracked_state gen7_sol_state;
 extern const struct brw_tracked_state gen7_urb;
 extern const struct brw_tracked_state gen7_vs_state;
 extern const struct brw_tracked_state gen7_wm_state;
+extern const struct brw_tracked_state gen7_hw_binding_tables;
 extern const struct brw_tracked_state haswell_cut_index;
 extern const struct brw_tracked_state gen8_blend_state;
 extern const struct brw_tracked_state gen8_disable_stages;
@@ -372,6 +373,11 @@ gen7_upload_constant_state(struct brw_context *brw,
                            const struct brw_stage_state *stage_state,
                            bool active, unsigned opcode);
 
+void gen7_rs_control(struct brw_context *brw, int enable);
+void gen7_enable_hw_binding_tables(struct brw_context *brw);
+void gen7_disable_hw_binding_tables(struct brw_context *brw);
+void gen7_reset_rs_pool_offsets(struct brw_context *brw);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 84b0861..a9d64bd 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -191,6 +191,8 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
    &gen6_color_calc_state,	/* must do before cc unit */
    &gen6_depth_stencil_state,	/* must do before cc unit */
 
+   &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
+
    &gen6_vs_push_constants, /* Before vs_state */
    &gen6_gs_push_constants, /* Before gs_state */
    &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index a2a3a95..caeb31b 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -32,6 +32,7 @@
 #include "intel_buffers.h"
 #include "intel_fbo.h"
 #include "brw_context.h"
+#include "brw_state.h"
 
 #include <xf86drm.h>
 #include <i915_drm.h>
@@ -379,6 +380,9 @@ _intel_batchbuffer_flush(struct brw_context *brw,
       drm_intel_bo_wait_rendering(brw->batch.bo);
    }
 
+   if (brw->gen >= 7)
+      gen7_reset_rs_pool_offsets(brw);
+
    /* Start a new batch buffer. */
    brw_new_batch(brw);
 
-- 
1.9.1



More information about the mesa-dev mailing list