[Mesa-dev] [PATCH v3 13/44] i965: Implement L3 state atom.
Kenneth Graunke
kenneth at whitecape.org
Thu Dec 3 12:06:25 PST 2015
On Tuesday, December 01, 2015 12:19:31 AM Jordan Justen wrote:
> From: Francisco Jerez <currojerez at riseup.net>
>
> The L3 state atom calculates the target L3 partition weights when the
> program bound to some shader stage is modified, and in case they are
> far enough from the current partitioning it makes sure that the L3
> state is re-emitted.
>
> v3: Fix for inconsistent units the context URB size is expressed in.
> Clamp URB size to 1008 KB on SKL due to FF hardware limitation.
> ---
> src/mesa/drivers/dri/i965/brw_context.h | 6 +++
> src/mesa/drivers/dri/i965/brw_state.h | 1 +
> src/mesa/drivers/dri/i965/gen7_l3_state.c | 81 +++++++++++++++++++++++++++++++
> 3 files changed, 88 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 0b91147..48024c6 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -672,6 +672,8 @@ enum brw_predicate_state {
>
> struct shader_times;
>
> +struct brw_l3_config;
> +
> /**
> * brw_context is derived from gl_context.
> */
> @@ -1214,6 +1216,10 @@ struct brw_context
> int basevertex;
>
> struct {
> + const struct brw_l3_config *config;
> + } l3;
> +
> + struct {
> drm_intel_bo *bo;
> const char **names;
> int *ids;
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
> index 94734ba..49f301a 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -129,6 +129,7 @@ extern const struct brw_tracked_state gen7_depthbuffer;
> extern const struct brw_tracked_state gen7_clip_state;
> extern const struct brw_tracked_state gen7_disable_stages;
> extern const struct brw_tracked_state gen7_gs_state;
> +extern const struct brw_tracked_state gen7_l3_state;
> extern const struct brw_tracked_state gen7_ps_state;
> extern const struct brw_tracked_state gen7_push_constant_space;
> extern const struct brw_tracked_state gen7_sbe_state;
> diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c
> index 0b5e231..a895723 100644
> --- a/src/mesa/drivers/dri/i965/gen7_l3_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c
> @@ -418,3 +418,84 @@ setup_l3_config(struct brw_context *brw, const struct brw_l3_config *cfg)
> }
> }
> }
> +
> +/**
> + * Return the unit brw_context::urb::size is expressed in, in KB. \sa
> + * brw_device_info::urb::size.
> + */
> +static unsigned
> +get_urb_size_scale(const struct brw_device_info *devinfo)
> +{
> + return (devinfo->gen >= 8 ? devinfo->num_slices : 1);
> +}
> +
> +/**
> + * Update the URB size in the context state for the specified L3
> + * configuration.
> + */
> +static void
> +update_urb_size(struct brw_context *brw, const struct brw_l3_config *cfg)
> +{
> + const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
> + /* From the SKL "L3 Allocation and Programming" documentation:
> + *
> + * "URB is limited to 1008KB due to programming restrictions. This is not
> + * a restriction of the L3 implementation, but of the FF and other clients.
> + * Therefore, in a GT4 implementation it is possible for the programmed
> + * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
> + * only 1008KB of this will be used."
> + */
> + const unsigned max = (devinfo->gen == 9 ? 1008 : ~0);
I think this would be clearer as devinfo->gt == 4, since it applies to
all currently known GT4 parts, and doesn't apply to GT1-3. Presumably
GT1-3 just don't have enough URB to hit that limit, so it's probably
moot...*shrug*.
Acked-by: Kenneth Graunke <kenneth at whitecape.org>
> + const unsigned sz =
> + MIN2(max, cfg->n[L3P_URB] * get_l3_way_size(devinfo)) /
> + get_urb_size_scale(devinfo);
> +
> + if (brw->urb.size != sz) {
> + brw->urb.size = sz;
> + brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE;
> + }
> +}
> +
> +static void
> +emit_l3_state(struct brw_context *brw)
> +{
> + const struct brw_l3_weights w = get_pipeline_state_l3_weights(brw);
> + const float dw = diff_l3_weights(w, get_config_l3_weights(brw->l3.config));
> + /* The distance between any two compatible weight vectors cannot exceed two
> + * due to the triangle inequality.
> + */
> + const float large_dw_threshold = 2.0;
> + /* Somewhat arbitrary, simply makes sure that there will be no repeated
> + * transitions to the same L3 configuration, could probably do better here.
> + */
> + const float small_dw_threshold = 0.5;
> + /* If we're emitting a new batch the caches should already be clean and the
> + * transition should be relatively cheap, so it shouldn't hurt much to use
> + * the smaller threshold. Otherwise use the larger threshold so that we
> + * only reprogram the L3 mid-batch if the most recently programmed
> + * configuration is incompatible with the current pipeline state.
> + */
This seems like a good idea.
> + const float dw_threshold = (brw->ctx.NewDriverState & BRW_NEW_BATCH ?
> + small_dw_threshold : large_dw_threshold);
> +
> + if (dw > dw_threshold && brw->can_do_pipelined_register_writes) {
> + const struct brw_l3_config *const cfg =
> + get_l3_config(brw->intelScreen->devinfo, w);
> +
> + setup_l3_config(brw, cfg);
> + update_urb_size(brw, cfg);
> + brw->l3.config = cfg;
> + }
> +}
> +
> +const struct brw_tracked_state gen7_l3_state = {
> + .dirty = {
> + .mesa = 0,
> + .brw = BRW_NEW_BATCH |
> + BRW_NEW_CS_PROG_DATA |
> + BRW_NEW_FS_PROG_DATA |
> + BRW_NEW_GS_PROG_DATA |
> + BRW_NEW_VS_PROG_DATA,
> + },
> + .emit = emit_l3_state
> +};
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20151203/d1adaa2e/attachment-0001.sig>
More information about the mesa-dev
mailing list