<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Tue, Aug 23, 2016 at 8:50 AM, Pohjolainen, Topi <span dir="ltr"><<a href="mailto:topi.pohjolainen@gmail.com" target="_blank">topi.pohjolainen@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5">On Fri, Aug 19, 2016 at 09:55:54AM -0700, Jason Ekstrand wrote:<br>
> ---<br>
> src/mesa/drivers/dri/i965/<wbr>blorp.c | 66 -----------------------------<br>
> src/mesa/drivers/dri/i965/<wbr>genX_blorp_exec.c | 66 +++++++++++++++++++++++++++++<br>
> src/mesa/drivers/dri/i965/<wbr>genX_blorp_exec.h | 8 ++--<br>
> 3 files changed, 70 insertions(+), 70 deletions(-)<br>
><br>
> diff --git a/src/mesa/drivers/dri/i965/<wbr>blorp.c b/src/mesa/drivers/dri/i965/<wbr>blorp.c<br>
> index dba3441..0688f6b 100644<br>
> --- a/src/mesa/drivers/dri/i965/<wbr>blorp.c<br>
> +++ b/src/mesa/drivers/dri/i965/<wbr>blorp.c<br>
> @@ -347,28 +347,6 @@ brw_blorp_compile_nir_shader(<wbr>struct brw_context *brw, struct nir_shader *nir,<br>
> void<br>
> brw_blorp_exec(struct brw_context *brw, const struct brw_blorp_params *params)<br>
> {<br>
> - struct gl_context *ctx = &brw->ctx;<br>
> - const uint32_t estimated_max_batch_usage = brw->gen >= 8 ? 1800 : 1500;<br>
> - bool check_aperture_failed_once = false;<br>
> -<br>
> - /* Flush the sampler and render caches. We definitely need to flush the<br>
> - * sampler cache so that we get updated contents from the render cache for<br>
> - * the glBlitFramebuffer() source. Also, we are sometimes warned in the<br>
> - * docs to flush the cache between reinterpretations of the same surface<br>
> - * data with different formats, which blorp does for stencil and depth<br>
> - * data.<br>
> - */<br>
> - brw_emit_mi_flush(brw);<br>
> -<br>
> - brw_select_pipeline(brw, BRW_RENDER_PIPELINE);<br>
> -<br>
> -retry:<br>
> - intel_batchbuffer_require_<wbr>space(brw, estimated_max_batch_usage, RENDER_RING);<br>
> - intel_batchbuffer_save_state(<wbr>brw);<br>
> - drm_intel_bo *saved_bo = brw-><a href="http://batch.bo" rel="noreferrer" target="_blank">batch.bo</a>;<br>
> - uint32_t saved_used = USED_BATCH(brw->batch);<br>
> - uint32_t saved_state_batch_offset = brw->batch.state_batch_offset;<br>
> -<br>
> switch (brw->gen) {<br>
> case 6:<br>
> gen6_blorp_exec(brw, params);<br>
> @@ -389,50 +367,6 @@ retry:<br>
> /* BLORP is not supported before Gen6. */<br>
> unreachable("not reached");<br>
> }<br>
> -<br>
> - /* Make sure we didn't wrap the batch unintentionally, and make sure we<br>
> - * reserved enough space that a wrap will never happen.<br>
> - */<br>
> - assert(brw-><a href="http://batch.bo" rel="noreferrer" target="_blank">batch.bo</a> == saved_bo);<br>
> - assert((USED_BATCH(brw->batch) - saved_used) * 4 +<br>
> - (saved_state_batch_offset - brw->batch.state_batch_offset) <<br>
> - estimated_max_batch_usage);<br>
> - /* Shut up compiler warnings on release build */<br>
> - (void)saved_bo;<br>
> - (void)saved_used;<br>
> - (void)saved_state_batch_<wbr>offset;<br>
> -<br>
> - /* Check if the blorp op we just did would make our batch likely to fail to<br>
> - * map all the BOs into the GPU at batch exec time later. If so, flush the<br>
> - * batch and try again with nothing else in the batch.<br>
> - */<br>
> - if (dri_bufmgr_check_aperture_<wbr>space(&brw-><a href="http://batch.bo" rel="noreferrer" target="_blank">batch.bo</a>, 1)) {<br>
> - if (!check_aperture_failed_once) {<br>
> - check_aperture_failed_once = true;<br>
> - intel_batchbuffer_reset_to_<wbr>saved(brw);<br>
> - intel_batchbuffer_flush(brw);<br>
> - goto retry;<br>
> - } else {<br>
> - int ret = intel_batchbuffer_flush(brw);<br>
> - WARN_ONCE(ret == -ENOSPC,<br>
> - "i965: blorp emit exceeded available aperture space\n");<br>
> - }<br>
> - }<br>
> -<br>
> - if (unlikely(brw->always_flush_<wbr>batch))<br>
> - intel_batchbuffer_flush(brw);<br>
> -<br>
> - /* We've smashed all state compared to what the normal 3D pipeline<br>
> - * rendering tracks for GL.<br>
> - */<br>
> - brw->ctx.NewDriverState |= BRW_NEW_BLORP;<br>
> - brw->no_depth_or_stencil = false;<br>
> - brw->ib.type = -1;<br>
> -<br>
> - /* Flush the sampler cache so any texturing from the destination is<br>
> - * coherent.<br>
> - */<br>
> - brw_emit_mi_flush(brw);<br>
> }<br>
><br>
> void<br>
> diff --git a/src/mesa/drivers/dri/i965/<wbr>genX_blorp_exec.c b/src/mesa/drivers/dri/i965/<wbr>genX_blorp_exec.c<br>
> index e07fa0a..9ba1f8a 100644<br>
> --- a/src/mesa/drivers/dri/i965/<wbr>genX_blorp_exec.c<br>
> +++ b/src/mesa/drivers/dri/i965/<wbr>genX_blorp_exec.c<br>
> @@ -170,6 +170,28 @@ void<br>
> genX(blorp_exec)(struct brw_context *brw,<br>
> const struct brw_blorp_params *params)<br>
> {<br>
> + struct gl_context *ctx = &brw->ctx;<br>
> + const uint32_t estimated_max_batch_usage = GEN_GEN >= 8 ? 1800 : 1500;<br>
> + bool check_aperture_failed_once = false;<br>
> +<br>
> + /* Flush the sampler and render caches. We definitely need to flush the<br>
> + * sampler cache so that we get updated contents from the render cache for<br>
> + * the glBlitFramebuffer() source. Also, we are sometimes warned in the<br>
> + * docs to flush the cache between reinterpretations of the same surface<br>
> + * data with different formats, which blorp does for stencil and depth<br>
> + * data.<br>
> + */<br>
> + brw_emit_mi_flush(brw);<br>
> +<br>
> + brw_select_pipeline(brw, BRW_RENDER_PIPELINE);<br>
> +<br>
> +retry:<br>
> + intel_batchbuffer_require_<wbr>space(brw, estimated_max_batch_usage, RENDER_RING);<br>
> + intel_batchbuffer_save_state(<wbr>brw);<br>
> + drm_intel_bo *saved_bo = brw-><a href="http://batch.bo" rel="noreferrer" target="_blank">batch.bo</a>;<br>
> + uint32_t saved_used = USED_BATCH(brw->batch);<br>
> + uint32_t saved_state_batch_offset = brw->batch.state_batch_offset;<br>
> +<br>
> #if GEN_GEN == 6<br>
> /* Emit workaround flushes when we switch from drawing to blorping. */<br>
> brw_emit_post_sync_nonzero_<wbr>flush(brw);<br>
> @@ -187,4 +209,48 @@ genX(blorp_exec)(struct brw_context *brw,<br>
> brw_emit_depth_stall_flushes(<wbr>brw);<br>
><br>
> blorp_exec(&brw->blorp, brw, params);<br>
> +<br>
> + /* Make sure we didn't wrap the batch unintentionally, and make sure we<br>
> + * reserved enough space that a wrap will never happen.<br>
> + */<br>
> + assert(brw-><a href="http://batch.bo" rel="noreferrer" target="_blank">batch.bo</a> == saved_bo);<br>
> + assert((USED_BATCH(brw->batch) - saved_used) * 4 +<br>
> + (saved_state_batch_offset - brw->batch.state_batch_offset) <<br>
> + estimated_max_batch_usage);<br>
> + /* Shut up compiler warnings on release build */<br>
> + (void)saved_bo;<br>
> + (void)saved_used;<br>
> + (void)saved_state_batch_<wbr>offset;<br>
> +<br>
> + /* Check if the blorp op we just did would make our batch likely to fail to<br>
> + * map all the BOs into the GPU at batch exec time later. If so, flush the<br>
> + * batch and try again with nothing else in the batch.<br>
> + */<br>
> + if (dri_bufmgr_check_aperture_<wbr>space(&brw-><a href="http://batch.bo" rel="noreferrer" target="_blank">batch.bo</a>, 1)) {<br>
> + if (!check_aperture_failed_once) {<br>
> + check_aperture_failed_once = true;<br>
> + intel_batchbuffer_reset_to_<wbr>saved(brw);<br>
> + intel_batchbuffer_flush(brw);<br>
> + goto retry;<br>
> + } else {<br>
> + int ret = intel_batchbuffer_flush(brw);<br>
> + WARN_ONCE(ret == -ENOSPC,<br>
> + "i965: blorp emit exceeded available aperture space\n");<br>
> + }<br>
> + }<br>
> +<br>
> + if (unlikely(brw->always_flush_<wbr>batch))<br>
> + intel_batchbuffer_flush(brw);<br>
> +<br>
> + /* We've smashed all state compared to what the normal 3D pipeline<br>
> + * rendering tracks for GL.<br>
> + */<br>
> + brw->ctx.NewDriverState |= BRW_NEW_BLORP;<br>
> + brw->no_depth_or_stencil = false;<br>
> + brw->ib.type = -1;<br>
> +<br>
> + /* Flush the sampler cache so any texturing from the destination is<br>
> + * coherent.<br>
> + */<br>
> + brw_emit_mi_flush(brw);<br>
> }<br>
> diff --git a/src/mesa/drivers/dri/i965/<wbr>genX_blorp_exec.h b/src/mesa/drivers/dri/i965/<wbr>genX_blorp_exec.h<br>
> index 02a0397..f7fbf04 100644<br>
> --- a/src/mesa/drivers/dri/i965/<wbr>genX_blorp_exec.h<br>
> +++ b/src/mesa/drivers/dri/i965/<wbr>genX_blorp_exec.h<br>
> @@ -204,7 +204,7 @@ blorp_emit_input_varying_data(<wbr>struct blorp_batch batch,<br>
> for (unsigned i = 0; i < max_num_varyings; i++) {<br>
> const gl_varying_slot attr = VARYING_SLOT_VAR0 + i;<br>
><br>
> - if (!(params->wm_prog_data-><wbr>inputs_read & BITFIELD64_BIT(attr)))<br>
> + if (!(params->wm_prog_data-><wbr>inputs_read & (1ull << attr)))<br>
<br>
</div></div>This looks to belong to the previous patch (type of inputs_read gets changed<br>
already there).<br>
<span class=""><br>
> continue;<br>
><br>
> memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes);<br>
> @@ -391,7 +391,7 @@ blorp_emit_sf_config(struct blorp_batch batch,<br>
> }<br>
><br>
> blorp_emit(batch, GENX(3DSTATE_SBE), sbe) {<br>
> - sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;<br>
> + sbe.VertexURBEntryReadOffset = 1;<br>
<br>
</span>Dropping BRW_SF_URB_ENTRY_READ_OFFSET here and below don't seem to be directly<br>
related to this patch either.<br></blockquote><div><br></div><div>I pulled them out into a new commit called "remove some i965-isms from genX_blorp_exec.h"<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Otherwise this patch is:<br>
<br>
Reviewed-by: Topi Pohjolainen <<a href="mailto:topi.pohjolainen@intel.com">topi.pohjolainen@intel.com</a>><span class=""><br></span></blockquote><div><br></div><div>Thanks!<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class="">
> sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;<br>
> sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_<wbr>data);<br>
> sbe.<wbr>ForceVertexURBEntryReadLength = true;<br>
> @@ -419,7 +419,7 @@ blorp_emit_sf_config(struct blorp_batch batch,<br>
> }<br>
><br>
> blorp_emit(batch, GENX(3DSTATE_SBE), sbe) {<br>
> - sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;<br>
> + sbe.VertexURBEntryReadOffset = 1;<br>
> if (prog_data) {<br>
> sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;<br>
> sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_<wbr>data);<br>
> @@ -439,7 +439,7 @@ blorp_emit_sf_config(struct blorp_batch batch,<br>
> sf.<wbr>MultisampleRasterizationMode = params->dst.surf.samples > 1 ?<br>
> MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;<br>
><br>
> - sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;<br>
> + sf.VertexURBEntryReadOffset = 1;<br>
> if (prog_data) {<br>
> sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs;<br>
> sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_<wbr>data);<br>
> --<br>
> 2.5.0.400.gff86faf<br>
><br>
</span>> ______________________________<wbr>_________________<br>
> mesa-dev mailing list<br>
> <a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
> <a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</blockquote></div><br></div></div>