[Mesa-dev] [PATCH v3 13/44] i965: Implement L3 state atom.

Jordan Justen jordan.l.justen at intel.com
Tue Dec 1 00:19:31 PST 2015


From: Francisco Jerez <currojerez at riseup.net>

The L3 state atom calculates the target L3 partition weights when the
program bound to some shader stage is modified, and in case they are
far enough from the current partitioning it makes sure that the L3
state is re-emitted.

v3: Fix for inconsistent units the context URB size is expressed in.
    Clamp URB size to 1008 KB on SKL due to FF hardware limitation.
---
 src/mesa/drivers/dri/i965/brw_context.h   |  6 +++
 src/mesa/drivers/dri/i965/brw_state.h     |  1 +
 src/mesa/drivers/dri/i965/gen7_l3_state.c | 81 +++++++++++++++++++++++++++++++
 3 files changed, 88 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 0b91147..48024c6 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -672,6 +672,8 @@ enum brw_predicate_state {
 
 struct shader_times;
 
+struct brw_l3_config;
+
 /**
  * brw_context is derived from gl_context.
  */
@@ -1214,6 +1216,10 @@ struct brw_context
    int basevertex;
 
    struct {
+      const struct brw_l3_config *config;
+   } l3;
+
+   struct {
       drm_intel_bo *bo;
       const char **names;
       int *ids;
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 94734ba..49f301a 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -129,6 +129,7 @@ extern const struct brw_tracked_state gen7_depthbuffer;
 extern const struct brw_tracked_state gen7_clip_state;
 extern const struct brw_tracked_state gen7_disable_stages;
 extern const struct brw_tracked_state gen7_gs_state;
+extern const struct brw_tracked_state gen7_l3_state;
 extern const struct brw_tracked_state gen7_ps_state;
 extern const struct brw_tracked_state gen7_push_constant_space;
 extern const struct brw_tracked_state gen7_sbe_state;
diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c
index 0b5e231..a895723 100644
--- a/src/mesa/drivers/dri/i965/gen7_l3_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c
@@ -418,3 +418,84 @@ setup_l3_config(struct brw_context *brw, const struct brw_l3_config *cfg)
       }
    }
 }
+
+/**
+ * Return the unit brw_context::urb::size is expressed in, in KB.  \sa
+ * brw_device_info::urb::size.
+ */
+static unsigned
+get_urb_size_scale(const struct brw_device_info *devinfo)
+{
+   return (devinfo->gen >= 8 ? devinfo->num_slices : 1);
+}
+
+/**
+ * Update the URB size in the context state for the specified L3
+ * configuration.
+ */
+static void
+update_urb_size(struct brw_context *brw, const struct brw_l3_config *cfg)
+{
+   const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
+   /* From the SKL "L3 Allocation and Programming" documentation:
+    *
+    * "URB is limited to 1008KB due to programming restrictions.  This is not
+    * a restriction of the L3 implementation, but of the FF and other clients.
+    * Therefore, in a GT4 implementation it is possible for the programmed
+    * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
+    * only 1008KB of this will be used."
+    */
+   const unsigned max = (devinfo->gen == 9 ? 1008 : ~0);
+   const unsigned sz =
+      MIN2(max, cfg->n[L3P_URB] * get_l3_way_size(devinfo)) /
+      get_urb_size_scale(devinfo);
+
+   if (brw->urb.size != sz) {
+      brw->urb.size = sz;
+      brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE;
+   }
+}
+
+static void
+emit_l3_state(struct brw_context *brw)
+{
+   const struct brw_l3_weights w = get_pipeline_state_l3_weights(brw);
+   const float dw = diff_l3_weights(w, get_config_l3_weights(brw->l3.config));
+   /* The distance between any two compatible weight vectors cannot exceed two
+    * due to the triangle inequality.
+    */
+   const float large_dw_threshold = 2.0;
+   /* Somewhat arbitrary, simply makes sure that there will be no repeated
+    * transitions to the same L3 configuration, could probably do better here.
+    */
+   const float small_dw_threshold = 0.5;
+   /* If we're emitting a new batch the caches should already be clean and the
+    * transition should be relatively cheap, so it shouldn't hurt much to use
+    * the smaller threshold.  Otherwise use the larger threshold so that we
+    * only reprogram the L3 mid-batch if the most recently programmed
+    * configuration is incompatible with the current pipeline state.
+    */
+   const float dw_threshold = (brw->ctx.NewDriverState & BRW_NEW_BATCH ?
+                               small_dw_threshold : large_dw_threshold);
+
+   if (dw > dw_threshold && brw->can_do_pipelined_register_writes) {
+      const struct brw_l3_config *const cfg =
+         get_l3_config(brw->intelScreen->devinfo, w);
+
+      setup_l3_config(brw, cfg);
+      update_urb_size(brw, cfg);
+      brw->l3.config = cfg;
+   }
+}
+
+const struct brw_tracked_state gen7_l3_state = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_CS_PROG_DATA |
+             BRW_NEW_FS_PROG_DATA |
+             BRW_NEW_GS_PROG_DATA |
+             BRW_NEW_VS_PROG_DATA,
+   },
+   .emit = emit_l3_state
+};
-- 
2.6.2



More information about the mesa-dev mailing list