[Mesa-dev] [PATCH 10/13] i965: Implement L3 state atom.
Francisco Jerez
currojerez at riseup.net
Sun Sep 6 09:12:48 PDT 2015
The L3 state atom calculates the target L3 partition weights when the
program bound to some shader stage is modified, and in case they are
far enough from the current partitioning it makes sure that the L3
state is re-emitted.
---
src/mesa/drivers/dri/i965/brw_context.h | 6 ++++
src/mesa/drivers/dri/i965/brw_state.h | 1 +
src/mesa/drivers/dri/i965/gen7_l3_state.c | 60 +++++++++++++++++++++++++++++++
3 files changed, 67 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index c00b132..36ce357 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1031,6 +1031,8 @@ enum brw_predicate_state {
struct shader_times;
+struct brw_l3_config;
+
/**
* brw_context is derived from gl_context.
*/
@@ -1568,6 +1570,10 @@ struct brw_context
int basevertex;
struct {
+ const struct brw_l3_config *config;
+ } l3;
+
+ struct {
drm_intel_bo *bo;
const char **names;
int *ids;
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index e75b795..b7382c7 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -127,6 +127,7 @@ extern const struct brw_tracked_state gen7_depthbuffer;
extern const struct brw_tracked_state gen7_clip_state;
extern const struct brw_tracked_state gen7_disable_stages;
extern const struct brw_tracked_state gen7_gs_state;
+extern const struct brw_tracked_state gen7_l3_state;
extern const struct brw_tracked_state gen7_ps_state;
extern const struct brw_tracked_state gen7_push_constant_space;
extern const struct brw_tracked_state gen7_sbe_state;
diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c
index 1a88261..58eb07b 100644
--- a/src/mesa/drivers/dri/i965/gen7_l3_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c
@@ -418,3 +418,63 @@ setup_l3_config(struct brw_context *brw, const struct brw_l3_config *cfg)
}
}
}
+
+/**
+ * Update the URB size in the context state for the specified L3
+ * configuration.
+ */
+static void
+update_urb_size(struct brw_context *brw, const struct brw_l3_config *cfg)
+{
+ const unsigned sz = cfg->n[L3P_URB] *
+ get_l3_way_size(brw->intelScreen->devinfo);
+
+ if (brw->urb.size != sz) {
+ brw->urb.size = sz;
+ brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE;
+ }
+}
+
+static void
+emit_l3_state(struct brw_context *brw)
+{
+ const struct brw_l3_weights w = get_pipeline_state_l3_weights(brw);
+ const float dw = diff_l3_weights(w, get_config_l3_weights(brw->l3.config));
+ /* The distance between any two compatible weight vectors cannot exceed two
+ * due to the triangle inequality.
+ */
+ const float large_dw_threshold = 2.0;
+ /* Somewhat arbitrary, simply makes sure that there will be no repeated
+ * transitions to the same L3 configuration, could probably do better here.
+ */
+ const float small_dw_threshold = 0.5;
+ /* If we're emitting a new batch the caches should already be clean and the
+ * transition should be relatively cheap, so it shouldn't hurt much to use
+ * the smaller threshold. Otherwise use the larger threshold so that we
+ * only reprogram the L3 mid-batch if the most recently programmed
+ * configuration is incompatible with the current pipeline state.
+ */
+ const float dw_threshold = (brw->ctx.NewDriverState & BRW_NEW_BATCH ?
+ small_dw_threshold : large_dw_threshold);
+
+ if (dw > dw_threshold && brw->can_do_pipelined_register_writes) {
+ const struct brw_l3_config *const cfg =
+ get_l3_config(brw->intelScreen->devinfo, w);
+
+ setup_l3_config(brw, cfg);
+ update_urb_size(brw, cfg);
+ brw->l3.config = cfg;
+ }
+}
+
+const struct brw_tracked_state gen7_l3_state = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_VS_PROG_DATA |
+ BRW_NEW_GS_PROG_DATA |
+ BRW_NEW_FS_PROG_DATA |
+ BRW_NEW_CS_PROG_DATA,
+ },
+ .emit = emit_l3_state
+};
--
2.4.6
More information about the mesa-dev
mailing list