<div dir="ltr"><div class="gmail_extra"><br><div class="gmail_quote">On Tue, Jan 31, 2017 at 8:15 AM, Topi Pohjolainen <span dir="ltr"><<a href="mailto:topi.pohjolainen@gmail.com" target="_blank">topi.pohjolainen@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class="">Current estimate doesn't consider space needed for surface states<br>
and it only calculates for one shader stage. Each stage can have<br>
its own sampler and surface state configuration.<br>
<br>
While this is only matter of runtime dynamics we don't seem to hit<br>
it currently. However, this becomes visible with blorp tex uploads<br>
(HSW with piglit test max-samplers). One runs out of space while<br>
batch wrapping isn't allowed.<br>
<br>
</span>v2: Rebase on top of current upstream<br>
<span class=""><br>
Signed-off-by: Topi Pohjolainen <<a href="mailto:topi.pohjolainen@intel.com">topi.pohjolainen@intel.com</a>><br>
CC: Kenneth Graunke <<a href="mailto:kenneth@whitecape.org">kenneth@whitecape.org</a>><br>
CC: Jason Ekstrand <<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>><br>
---<br>
</span> src/mesa/drivers/dri/i965/brw_<wbr>draw.c | 49 ++++++++++++++++++++++++++++++<wbr>+++---<br>
1 file changed, 46 insertions(+), 3 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/<wbr>brw_draw.c b/src/mesa/drivers/dri/i965/<wbr>brw_draw.c<br>
index 0db7311..83a9f33 100644<br>
--- a/src/mesa/drivers/dri/i965/<wbr>brw_draw.c<br>
+++ b/src/mesa/drivers/dri/i965/<wbr>brw_draw.c<br>
@@ -395,6 +395,51 @@ brw_postdraw_set_buffers_need_<wbr>resolve(struct brw_context *brw)<br>
<div><div class="h5"> }<br>
}<br>
<br>
+static unsigned<br>
+brw_get_num_active_samplers(<wbr>const struct gl_context *ctx,<br>
+ const struct gl_program *prog)<br>
+{<br>
+ const unsigned last = util_last_bit(prog-><wbr>SamplersUsed);<br>
+ unsigned count = 0;<br>
+<br>
+ for (unsigned s = 0; s < last; s++) {<br>
+ if (prog->SamplersUsed & (1 << s)) {<br>
+ const unsigned unit = prog->SamplerUnits[s];<br>
+ if (ctx->Texture.Unit[unit]._<wbr>Current)<br>
+ ++count;<br>
+ }<br>
+ }<br>
+<br>
+ return count;<br>
+}<br>
+<br>
+static unsigned<br>
+brw_estimate_batch_space_for_<wbr>textures(const struct brw_context *brw)<br>
+{<br>
+ const struct gl_context *ctx = &brw->ctx;<br>
+ unsigned total = 0;<br>
+<br>
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {<br>
</div></div>+ const struct gl_program *prog = ctx->_Shader->CurrentProgram[<wbr>i];<br>
+<br>
+ if (prog == NULL)<br>
+ continue;<br>
+<br>
<span class="">+ const unsigned num_samplers = brw_get_num_active_samplers(<wbr>ctx, prog);<br>
+ const unsigned sampler_needs_per_tex_unit =<br>
+ 16 /* sampler_state_size */ +<br>
+ sizeof(struct gen5_sampler_default_color);<br>
+ const unsigned surface_state_needs_per_tex_<wbr>unit =<br>
+ ALIGN(brw->isl_dev.ss.size, brw->isl_dev.ss.align) +<br>
+ 4 /* binding table pointer */;<br>
+ const unsigned total_per_tex_unit = sampler_needs_per_tex_unit +<br>
+ surface_state_needs_per_tex_<wbr>unit;<br>
+ total += (num_samplers * total_per_tex_unit);<br></span></blockquote><div><br></div><div>This isn't exactly correct. While it's true that a binding table entry only consumes 4 bytes, binding table sizes have to be rounded up to 64B so, if you have a number of samplers that is not a multiple of 16, this will underestimate.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class="">
+ }<br>
+<br>
+ return total;<br>
+}<br>
+<br>
</span> /* May fail if out of video memory for texture or vbo upload, or on<br>
* fallback conditions.<br>
*/<br>
@@ -477,11 +522,9 @@ brw_try_draw_prims(struct gl_context *ctx,<br>
<div class="HOEnZb"><div class="h5"><br>
for (i = 0; i < nr_prims; i++) {<br>
int estimated_max_prim_size;<br>
- const int sampler_state_size = 16;<br>
<br>
estimated_max_prim_size = 512; /* batchbuffer commands */<br>
- estimated_max_prim_size += BRW_MAX_TEX_UNIT *<br>
- (sampler_state_size + sizeof(struct gen5_sampler_default_color));<br>
+ estimated_max_prim_size += brw_estimate_batch_space_for_<wbr>textures(brw);<br>
estimated_max_prim_size += 1024; /* gen6 VS push constants */<br>
estimated_max_prim_size += 1024; /* gen6 WM push constants */<br>
estimated_max_prim_size += 512; /* misc. pad */<br>
--<br>
2.5.5<br>
<br>
</div></div></blockquote></div><br></div></div>