[Mesa-dev] [v2 4/9] i965: Estimate batch space per shader stage
Jason Ekstrand
jason at jlekstrand.net
Tue Jan 31 18:38:08 UTC 2017
On Tue, Jan 31, 2017 at 8:15 AM, Topi Pohjolainen <
topi.pohjolainen at gmail.com> wrote:
> Current estimate doesn't consider space needed for surface states
> and it only calculates for one shader stage. Each stage can have
> its own sampler and surface state configuration.
>
> While this is only matter of runtime dynamics we don't seem to hit
> it currently. However, this becomes visible with blorp tex uploads
> (HSW with piglit test max-samplers). One runs out of space while
> batch wrapping isn't allowed.
>
> v2: Rebase on top of current upstream
>
> Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
> CC: Kenneth Graunke <kenneth at whitecape.org>
> CC: Jason Ekstrand <jason at jlekstrand.net>
> ---
> src/mesa/drivers/dri/i965/brw_draw.c | 49 ++++++++++++++++++++++++++++++
> +++---
> 1 file changed, 46 insertions(+), 3 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c
> b/src/mesa/drivers/dri/i965/brw_draw.c
> index 0db7311..83a9f33 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> @@ -395,6 +395,51 @@ brw_postdraw_set_buffers_need_resolve(struct
> brw_context *brw)
> }
> }
>
> +static unsigned
> +brw_get_num_active_samplers(const struct gl_context *ctx,
> + const struct gl_program *prog)
> +{
> + const unsigned last = util_last_bit(prog->SamplersUsed);
> + unsigned count = 0;
> +
> + for (unsigned s = 0; s < last; s++) {
> + if (prog->SamplersUsed & (1 << s)) {
> + const unsigned unit = prog->SamplerUnits[s];
> + if (ctx->Texture.Unit[unit]._Current)
> + ++count;
> + }
> + }
> +
> + return count;
> +}
> +
> +static unsigned
> +brw_estimate_batch_space_for_textures(const struct brw_context *brw)
> +{
> + const struct gl_context *ctx = &brw->ctx;
> + unsigned total = 0;
> +
> + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
> + const struct gl_program *prog = ctx->_Shader->CurrentProgram[i];
> +
> + if (prog == NULL)
> + continue;
> +
> + const unsigned num_samplers = brw_get_num_active_samplers(ctx,
> prog);
> + const unsigned sampler_needs_per_tex_unit =
> + 16 /* sampler_state_size */ +
> + sizeof(struct gen5_sampler_default_color);
> + const unsigned surface_state_needs_per_tex_unit =
> + ALIGN(brw->isl_dev.ss.size, brw->isl_dev.ss.align) +
> + 4 /* binding table pointer */;
> + const unsigned total_per_tex_unit = sampler_needs_per_tex_unit +
> + surface_state_needs_per_tex_
> unit;
> + total += (num_samplers * total_per_tex_unit);
>
This isn't exactly correct. While it's true that a binding table entry
only consumes 4 bytes, binding table sizes have to be rounded up to 64B so,
if you have a number of samplers that is not a multiple of 16, this will
underestimate.
> + }
> +
> + return total;
> +}
> +
> /* May fail if out of video memory for texture or vbo upload, or on
> * fallback conditions.
> */
> @@ -477,11 +522,9 @@ brw_try_draw_prims(struct gl_context *ctx,
>
> for (i = 0; i < nr_prims; i++) {
> int estimated_max_prim_size;
> - const int sampler_state_size = 16;
>
> estimated_max_prim_size = 512; /* batchbuffer commands */
> - estimated_max_prim_size += BRW_MAX_TEX_UNIT *
> - (sampler_state_size + sizeof(struct gen5_sampler_default_color));
> + estimated_max_prim_size += brw_estimate_batch_space_for_
> textures(brw);
> estimated_max_prim_size += 1024; /* gen6 VS push constants */
> estimated_max_prim_size += 1024; /* gen6 WM push constants */
> estimated_max_prim_size += 512; /* misc. pad */
> --
> 2.5.5
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170131/73770451/attachment-0001.html>
More information about the mesa-dev
mailing list