<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Aug 18, 2016 at 6:27 AM, Pohjolainen, Topi <span dir="ltr"><<a href="mailto:topi.pohjolainen@intel.com" target="_blank">topi.pohjolainen@intel.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5">On Thu, Aug 11, 2016 at 02:15:21PM -0700, Jason Ekstrand wrote:<br>
> ---<br>
> src/mesa/drivers/dri/i965/<wbr>Makefile.am | 1 +<br>
> src/mesa/drivers/dri/i965/<wbr>gen6_blorp.c | 685 ++++++++++++------------------<wbr>---<br>
> 2 files changed, 256 insertions(+), 430 deletions(-)<br>
><br>
> diff --git a/src/mesa/drivers/dri/i965/<wbr>Makefile.am b/src/mesa/drivers/dri/i965/<wbr>Makefile.am<br>
> index 0a5222e..77ad1e8 100644<br>
> --- a/src/mesa/drivers/dri/i965/<wbr>Makefile.am<br>
> +++ b/src/mesa/drivers/dri/i965/<wbr>Makefile.am<br>
> @@ -36,6 +36,7 @@ AM_CFLAGS = \<br>
> -I$(top_srcdir)/src/compiler/<wbr>nir \<br>
> -I$(top_srcdir)/src/intel \<br>
> -I$(top_builddir)/src/<wbr>compiler/nir \<br>
> + -I$(top_builddir)/src/intel \<br>
> -I$(top_builddir)/src/mesa/<wbr>drivers/dri/common \<br>
> $(DEFINES) \<br>
> $(VISIBILITY_CFLAGS) \<br>
> diff --git a/src/mesa/drivers/dri/i965/<wbr>gen6_blorp.c b/src/mesa/drivers/dri/i965/<wbr>gen6_blorp.c<br>
> index 78e9472..875fdc9 100644<br>
> --- a/src/mesa/drivers/dri/i965/<wbr>gen6_blorp.c<br>
> +++ b/src/mesa/drivers/dri/i965/<wbr>gen6_blorp.c<br>
> @@ -27,469 +27,245 @@<br>
> #include "intel_mipmap_tree.h"<br>
><br>
> #include "brw_context.h"<br>
> -#include "brw_defines.h"<br>
> #include "brw_state.h"<br>
><br>
> #include "blorp_priv.h"<br>
> -#include "vbo/vbo.h"<br>
> -#include "brw_draw.h"<br>
><br>
> -/* 3DSTATE_URB<br>
> - *<br>
> - * Assign the entire URB to the VS. Even though the VS disabled, URB space<br>
> - * is still needed because the clipper loads the VUE's from the URB. From<br>
> - * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,<br>
> - * Dword 1.15:0 "VS Number of URB Entries":<br>
> - * This field is always used (even if VS Function Enable is DISABLED).<br>
> - *<br>
> - * The warning below appears in the PRM (Section 3DSTATE_URB), but we can<br>
> - * safely ignore it because this batch contains only one draw call.<br>
> - * Because of URB corruption caused by allocating a previous GS unit<br>
</div></div>> - * URB entry to the VS unit, software is required to send a ???GS NULL<br>
> - * Fence??? (Send URB fence with VS URB size == 1 and GS URB size == 0)<br>
<span class="">> - * plus a dummy DRAW call before any case where VS will be taking over<br>
> - * GS URB space.<br>
> - */<br>
<br>
</span>In addition to changing the actual patch formatting you are dropping almost<br>
all the comments giving the rational for the actual values. For example, here.<br>
I at least found them quite valuable when I was learning blorp.<br>
<br>
Just a few similar comments and tiny nits. I tried to carefully compare the<br>
original logic side-by-side with the new and couldn't see anything dropped or<br>
added:<br>
<br>
Reviewed-by: Topi Pohjolainen <<a href="mailto:topi.pohjolainen@intel.com">topi.pohjolainen@intel.com</a>><br>
<span class=""><br>
> -static void<br>
> -gen6_blorp_emit_urb_config(<wbr>struct brw_context *brw,<br>
> - const struct brw_blorp_params *params)<br>
> -{<br>
> - BEGIN_BATCH(3);<br>
> - OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));<br>
> - OUT_BATCH(brw->urb.max_vs_<wbr>entries << GEN6_URB_VS_ENTRIES_SHIFT);<br>
> - OUT_BATCH(0);<br>
> - ADVANCE_BATCH();<br>
> -}<br>
> +#define GEN_VERSIONx10 60<br>
> +#include "genxml/gen_macros.h"<br>
><br>
> -<br>
> -/* 3DSTATE_CC_STATE_POINTERS<br>
> - *<br>
> - * The pointer offsets are relative to<br>
> - * CMD_STATE_BASE_ADDRESS.<wbr>DynamicStateBaseAddress.<br>
> - *<br>
> - * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.<br>
<br>
</span>And here.<br>
<div><div class="h5"><br>
> - */<br>
> -static void<br>
> -gen6_blorp_emit_cc_state_<wbr>pointers(struct brw_context *brw,<br>
> - const struct brw_blorp_params *params,<br>
> - uint32_t cc_blend_state_offset,<br>
> - uint32_t depthstencil_offset,<br>
> - uint32_t cc_state_offset)<br>
> +static void *<br>
> +blorp_emit_dwords(struct brw_context *brw, unsigned n)<br>
> {<br>
> - BEGIN_BATCH(4);<br>
> - OUT_BATCH(_3DSTATE_CC_STATE_<wbr>POINTERS << 16 | (4 - 2));<br>
> - OUT_BATCH(cc_blend_state_<wbr>offset | 1); /* BLEND_STATE offset */<br>
> - OUT_BATCH(depthstencil_offset | 1); /* DEPTH_STENCIL_STATE offset */<br>
> - OUT_BATCH(cc_state_offset | 1); /* COLOR_CALC_STATE offset */<br>
> - ADVANCE_BATCH();<br>
> + intel_batchbuffer_begin(brw, n, RENDER_RING);<br>
> + uint32_t *map = brw->batch.map_next;<br>
> + brw->batch.map_next += n;<br>
> + intel_batchbuffer_advance(brw)<wbr>;<br>
> + return map;<br>
> }<br>
><br>
> +struct blorp_address {<br>
> + drm_intel_bo *buffer;<br>
> + uint32_t read_domains;<br>
> + uint32_t write_domain;<br>
> + uint32_t offset;<br>
> +};<br>
><br>
> -/**<br>
> - * 3DSTATE_SAMPLER_STATE_<wbr>POINTERS. See upload_sampler_state_pointers(<wbr>).<br>
> - */<br>
> -static void<br>
> -gen6_blorp_emit_sampler_<wbr>state_pointers(struct brw_context *brw,<br>
> - uint32_t sampler_offset)<br>
> +static uint64_t<br>
> +blorp_emit_reloc(struct brw_context *brw, void *location,<br>
> + struct blorp_address address, uint32_t delta)<br>
> {<br>
> - BEGIN_BATCH(4);<br>
> - OUT_BATCH(_3DSTATE_SAMPLER_<wbr>STATE_POINTERS << 16 |<br>
> - VS_SAMPLER_STATE_CHANGE |<br>
> - GS_SAMPLER_STATE_CHANGE |<br>
> - PS_SAMPLER_STATE_CHANGE |<br>
> - (4 - 2));<br>
> - OUT_BATCH(0); /* VS */<br>
> - OUT_BATCH(0); /* GS */<br>
> - OUT_BATCH(sampler_offset);<br>
> - ADVANCE_BATCH();<br>
> + uint32_t offset = (char *)location - (char *)brw->batch.map;<br>
> + if (brw->gen >= 8) {<br>
> + return intel_batchbuffer_reloc64(brw, address.buffer, offset,<br>
> + address.read_domains,<br>
> + address.write_domain,<br>
> + address.offset + delta);<br>
> + } else {<br>
> + return intel_batchbuffer_reloc(brw, address.buffer, offset,<br>
> + address.read_domains,<br>
> + address.write_domain,<br>
> + address.offset + delta);<br>
> + }<br>
> }<br>
><br>
> +#define __gen_address_type struct blorp_address<br>
> +#define __gen_user_data struct brw_context<br>
><br>
> -/* 3DSTATE_VS<br>
> - *<br>
> - * Disable vertex shader.<br>
> - */<br>
> -static void<br>
> -gen6_blorp_emit_vs_disable(<wbr>struct brw_context *brw,<br>
> - const struct brw_blorp_params *params)<br>
> +static uint64_t<br>
> +__gen_combine_address(struct brw_context *brw, void *location,<br>
> + struct blorp_address address, uint32_t delta)<br>
> {<br>
> - /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,<br>
> - * 3DSTATE_VS, Dword 5.0 "VS Function Enable":<br>
> - *<br>
> - * [DevSNB] A pipeline flush must be programmed prior to a<br>
> - * 3DSTATE_VS command that causes the VS Function Enable to<br>
> - * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL<br>
> - * command with CS stall bit set and a post sync operation.<br>
> - *<br>
> - * We've already done one at the start of the BLORP operation.<br>
> - */<br>
<br>
</div></div>I would keep this.<br>
<div><div class="h5"><br>
> -<br>
> - BEGIN_BATCH(6);<br>
> - OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - ADVANCE_BATCH();<br>
> + if (address.buffer == NULL) {<br>
> + return address.offset + delta;<br>
> + } else {<br>
> + return blorp_emit_reloc(brw, location, address, delta);<br>
> + }<br>
> }<br>
><br>
> +#include "genxml/genX_pack.h"<br>
><br>
> -/* 3DSTATE_GS<br>
> - *<br>
> - * Disable the geometry shader.<br>
> - */<br>
> -static void<br>
> -gen6_blorp_emit_gs_disable(<wbr>struct brw_context *brw,<br>
> - const struct brw_blorp_params *params)<br>
> -{<br>
> - BEGIN_BATCH(7);<br>
> - OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - ADVANCE_BATCH();<br>
> - brw->gs.enabled = false;<br>
> -}<br>
> +#define _blorp_cmd_length(cmd) cmd ## _length<br>
> +#define _blorp_cmd_header(cmd) cmd ## _header<br>
> +#define _blorp_cmd_pack(cmd) cmd ## _pack<br>
><br>
> +#define blorp_emit(brw, cmd, name) \<br>
> + for (struct cmd name = { _blorp_cmd_header(cmd) }, \<br>
> + *_dst = blorp_emit_dwords(brw, _blorp_cmd_length(cmd)); \<br>
> + __builtin_expect(_dst != NULL, 1); \<br>
> + _blorp_cmd_pack(cmd)(brw, (void *)_dst, &name), \<br>
> + _dst = NULL)<br>
><br>
> -/* 3DSTATE_SF<br>
> - *<br>
> - * Disable ViewportTransformEnable (dw2.1)<br>
> - *<br>
> - * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D<br>
> - * Primitives Overview":<br>
> - * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the<br>
> - * use of screen- space coordinates).<br>
> - *<br>
> - * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)<br>
> - * and BackFaceFillMode (dw2.5:6) to SOLID(0).<br>
> - *<br>
> - * From the Sandy Bridge PRM, Volume 2, Part 1, Section<br>
> - * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:<br>
> - * SOLID: Any triangle or rectangle object found to be front-facing<br>
> - * is rendered as a solid object. This setting is required when<br>
> - * (rendering rectangle (RECTLIST) objects.<br>
> - */<br>
<br>
</div></div>I would keep this.<br>
<div><div class="h5"><br>
> static void<br>
> -gen6_blorp_emit_sf_config(<wbr>struct brw_context *brw,<br>
> - const struct brw_blorp_params *params)<br>
> +blorp_emit_sf_config(struct brw_context *brw,<br>
> + const struct brw_blorp_params *params)<br>
> {<br>
> - const unsigned num_varyings =<br>
> - params->wm_prog_data ? params->wm_prog_data->num_<wbr>varying_inputs : 0;<br>
> - const unsigned urb_read_length =<br>
> - brw_blorp_get_urb_length(<wbr>params->wm_prog_data);<br>
> -<br>
> - BEGIN_BATCH(20);<br>
> - OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2));<br>
> - OUT_BATCH(num_varyings << GEN6_SF_NUM_OUTPUTS_SHIFT |<br>
> - urb_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_<wbr>SHIFT |<br>
> - BRW_SF_URB_ENTRY_READ_OFFSET <<<br>
> - GEN6_SF_URB_ENTRY_READ_OFFSET_<wbr>SHIFT);<br>
> - OUT_BATCH(0); /* dw2 */<br>
> - OUT_BATCH(params->dst.surf.<wbr>samples > 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0);<br>
> - for (int i = 0; i < 13; ++i)<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(params->wm_prog_data ? params->wm_prog_data->flat_<wbr>inputs : 0);<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - ADVANCE_BATCH();<br>
> -}<br>
> + const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;<br>
><br>
> + blorp_emit(brw, GENX(3DSTATE_SF), sf) {<br>
> + sf.FrontFaceFillMode = FILL_MODE_SOLID;<br>
> + sf.BackFaceFillMode = FILL_MODE_SOLID;<br>
> +<br>
> + sf.<wbr>MultisampleRasterizationMode = params->dst.surf.samples > 1 ?<br>
> + MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;<br>
> +<br>
> + sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;<br>
> + if (prog_data) {<br>
> + sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs;<br>
> + sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_<wbr>data);<br>
> + sf.ConstantInterpolationEnable = prog_data->flat_inputs;<br>
> + } else {<br>
> + sf.NumberofSFOutputAttributes = 0;<br>
<br>
</div></div>In almost all other cases you omit assigning values if they are zero. Should<br>
we drop this also for consistency?<span class=""><br></span></blockquote><div><br></div><div>Yes and no. In general, 0 is a good enough "default" value and you might as well leave it alone. In this particular case, I set NumberofSFOutputAttributes explicitly because its value is important. On the other hand, the value of ConstantInterpolationEnable doesn't matter because we're disabling the FS in this case.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class="">
> + sf.VertexURBEntryReadLength = 1;<br>
> + }<br>
> + }<br>
> +}<br>
><br>
> -/**<br>
> - * Enable or disable thread dispatch and set the HiZ op appropriately.<br>
> - */<br>
> static void<br>
> -gen6_blorp_emit_wm_config(<wbr>struct brw_context *brw,<br>
> - const struct brw_blorp_params *params)<br>
> +blorp_emit_wm_config(struct brw_context *brw,<br>
> + const struct brw_blorp_params *params)<br>
> {<br>
> const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;<br>
> - uint32_t dw2, dw4, dw5, dw6, ksp0, ksp2;<br>
> -<br>
> - /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be<br>
> - * nonzero to prevent the GPU from hanging. While the documentation doesn't<br>
> - * mention this explicitly, it notes that the valid range for the field is<br>
> - * [1,39] = [2,40] threads, which excludes zero.<br>
> - *<br>
> - * To be safe (and to minimize extraneous code) we go ahead and fully<br>
> - * configure the WM state whether or not there is a WM program.<br>
> - */<br>
<br>
</span>I would keep this.<br>
<div><div class="h5"><br>
> -<br>
> - dw2 = dw4 = dw5 = dw6 = ksp0 = ksp2 = 0;<br>
> - switch (params->hiz_op) {<br>
> - case GEN6_HIZ_OP_DEPTH_CLEAR:<br>
> - dw4 |= GEN6_WM_DEPTH_CLEAR;<br>
> - break;<br>
> - case GEN6_HIZ_OP_DEPTH_RESOLVE:<br>
> - dw4 |= GEN6_WM_DEPTH_RESOLVE;<br>
> - break;<br>
> - case GEN6_HIZ_OP_HIZ_RESOLVE:<br>
> - dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_<wbr>RESOLVE;<br>
> - break;<br>
> - case GEN6_HIZ_OP_NONE:<br>
> - break;<br>
> - default:<br>
> - unreachable("not reached");<br>
> - }<br>
> - dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;<br>
> - dw6 |= 0 << GEN6_WM_BARYCENTRIC_<wbr>INTERPOLATION_MODE_SHIFT; /* No interp */<br>
> - dw6 |= (params->wm_prog_data ? prog_data->num_varying_inputs : 0) <<<br>
> - GEN6_WM_NUM_SF_OUTPUTS_SHIFT;<br>
><br>
> - if (params->wm_prog_data) {<br>
> - dw5 |= GEN6_WM_DISPATCH_ENABLE; /* We are rendering */<br>
> + blorp_emit(brw, GENX(3DSTATE_WM), wm) {<br>
> + wm.MaximumNumberofThreads = brw->max_wm_threads - 1;<br>
> +<br>
> + switch (params->hiz_op) {<br>
> + case GEN6_HIZ_OP_DEPTH_CLEAR:<br>
> + wm.DepthBufferClear = true;<br>
> + break;<br>
> + case GEN6_HIZ_OP_DEPTH_RESOLVE:<br>
> + wm.DepthBufferResolveEnable = true;<br>
> + break;<br>
> + case GEN6_HIZ_OP_HIZ_RESOLVE:<br>
> + wm.<wbr>HierarchicalDepthBufferResolve<wbr>Enable = true;<br>
> + break;<br>
> + case GEN6_HIZ_OP_NONE:<br>
> + break;<br>
> + default:<br>
> + unreachable("not reached");<br>
> + }<br>
><br>
> - dw4 |= prog_data->first_curbe_grf_0 << GEN6_WM_DISPATCH_START_GRF_<wbr>SHIFT_0;<br>
> - dw4 |= prog_data->first_curbe_grf_2 << GEN6_WM_DISPATCH_START_GRF_<wbr>SHIFT_2;<br>
> + if (prog_data) {<br>
> + wm.ThreadDispatchEnable = true;<br>
><br>
> - ksp0 = params->wm_prog_kernel;<br>
> - ksp2 = params->wm_prog_kernel + params->wm_prog_data->ksp_<wbr>offset_2;<br>
> + wm.<wbr>DispatchGRFStartRegisterforCon<wbr>stantSetupData0 =<br>
> + prog_data->first_curbe_grf_0;<br>
> + wm.<wbr>DispatchGRFStartRegisterforCon<wbr>stantSetupData2 =<br>
> + prog_data->first_curbe_grf_2;<br>
><br>
> - if (params->wm_prog_data-><wbr>dispatch_8)<br>
> - dw5 |= GEN6_WM_8_DISPATCH_ENABLE;<br>
> - if (params->wm_prog_data-><wbr>dispatch_16)<br>
> - dw5 |= GEN6_WM_16_DISPATCH_ENABLE;<br>
> - }<br>
> + wm.KernelStartPointer0 = params->wm_prog_kernel;<br>
> + wm.KernelStartPointer2 =<br>
> + params->wm_prog_kernel + prog_data->ksp_offset_2;<br>
><br>
> - if (params-><a href="http://src.bo" rel="noreferrer" target="_blank">src.bo</a>) {<br>
> - dw5 |= GEN6_WM_KILL_ENABLE; /* TODO: temporarily smash on */<br>
> - dw2 |= 1 << GEN6_WM_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */<br>
> - }<br>
> + wm._8PixelDispatchEnable = prog_data->dispatch_8;<br>
> + wm._16PixelDispatchEnable = prog_data->dispatch_16;<br>
><br>
> - if (params->dst.surf.samples > 1) {<br>
> - dw6 |= GEN6_WM_MSRAST_ON_PATTERN;<br>
> - if (prog_data && prog_data->persample_msaa_<wbr>dispatch)<br>
> - dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;<br>
> - else<br>
> - dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;<br>
> - } else {<br>
> - dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;<br>
> - dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;<br>
> - }<br>
> -<br>
> - BEGIN_BATCH(9);<br>
> - OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));<br>
> - OUT_BATCH(ksp0);<br>
> - OUT_BATCH(dw2);<br>
> - OUT_BATCH(0); /* No scratch needed */<br>
> - OUT_BATCH(dw4);<br>
> - OUT_BATCH(dw5);<br>
> - OUT_BATCH(dw6);<br>
> - OUT_BATCH(0); /* kernel 1 pointer */<br>
> - OUT_BATCH(ksp2);<br>
> - ADVANCE_BATCH();<br>
> -}<br>
> + wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs;<br>
> + }<br>
><br>
> -static void<br>
> -gen6_blorp_emit_constant_<wbr>disable(struct brw_context *brw, unsigned opcode)<br>
> -{<br>
> - /* Disable the push constant buffers. */<br>
> - BEGIN_BATCH(5);<br>
> - OUT_BATCH(opcode << 16 | (5 - 2));<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - ADVANCE_BATCH();<br>
> -}<br>
> + if (params-><a href="http://src.bo" rel="noreferrer" target="_blank">src.bo</a>) {<br>
> + wm.SamplerCount = 1; /* Up to 4 samplers */<br>
> + wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on */<br>
> + }<br>
><br>
> -/**<br>
> - * 3DSTATE_BINDING_TABLE_POINTERS<br>
> - */<br>
> -static void<br>
> -gen6_blorp_emit_binding_<wbr>table_pointers(struct brw_context *brw,<br>
> - uint32_t wm_bind_bo_offset)<br>
> -{<br>
> - BEGIN_BATCH(4);<br>
> - OUT_BATCH(_3DSTATE_BINDING_<wbr>TABLE_POINTERS << 16 |<br>
> - GEN6_BINDING_TABLE_MODIFY_PS |<br>
> - (4 - 2));<br>
> - OUT_BATCH(0); /* vs -- ignored */<br>
> - OUT_BATCH(0); /* gs -- ignored */<br>
> - OUT_BATCH(wm_bind_bo_offset); /* wm/ps */<br>
> - ADVANCE_BATCH();<br>
> + if (params->dst.surf.samples > 1) {<br>
> + wm.<wbr>MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;<br>
> + wm.MultisampleDispatchMode =<br>
> + (prog_data && prog_data->persample_msaa_<wbr>dispatch) ?<br>
> + MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;<br>
> + } else {<br>
> + wm.<wbr>MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;<br>
> + wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;<br>
> + }<br>
> + }<br>
> }<br>
><br>
><br>
> static void<br>
> -gen6_blorp_emit_depth_<wbr>stencil_config(struct brw_context *brw,<br>
> - const struct brw_blorp_params *params)<br>
> +blorp_emit_depth_stencil_<wbr>config(struct brw_context *brw,<br>
> + const struct brw_blorp_params *params)<br>
> {<br>
> - uint32_t surftype;<br>
> -<br>
> - switch (params->depth.surf.dim) {<br>
> - case ISL_SURF_DIM_1D:<br>
> - surftype = BRW_SURFACE_1D;<br>
> - break;<br>
> - case ISL_SURF_DIM_2D:<br>
> - surftype = BRW_SURFACE_2D;<br>
> - break;<br>
> - case ISL_SURF_DIM_3D:<br>
> - surftype = BRW_SURFACE_3D;<br>
> - break;<br>
> - }<br>
> + brw_emit_depth_stall_flushes(<wbr>brw);<br>
><br>
> - /* 3DSTATE_DEPTH_BUFFER */<br>
> - {<br>
> - brw_emit_depth_stall_flushes(<wbr>brw);<br>
> + blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) {<br>
> + switch (params->depth.surf.dim) {<br>
> + case ISL_SURF_DIM_1D:<br>
> + db.SurfaceType = SURFTYPE_1D;<br>
> + break;<br>
> + case ISL_SURF_DIM_2D:<br>
> + db.SurfaceType = SURFTYPE_2D;<br>
> + break;<br>
> + case ISL_SURF_DIM_3D:<br>
> + db.SurfaceType = SURFTYPE_3D;<br>
> + break;<br>
> + }<br>
><br>
> - unsigned depth = MAX2(params->depth.surf.<wbr>logical_level0_px.depth,<br>
> - params->depth.surf.logical_<wbr>level0_px.array_len);<br>
> -<br>
> - BEGIN_BATCH(7);<br>
> - /* 3DSTATE_DEPTH_BUFFER dw0 */<br>
> - OUT_BATCH(_3DSTATE_DEPTH_<wbr>BUFFER << 16 | (7 - 2));<br>
> -<br>
> - /* 3DSTATE_DEPTH_BUFFER dw1 */<br>
> - OUT_BATCH((params->depth.surf.<wbr>row_pitch - 1) |<br>
> - params->depth_format << 18 |<br>
> - 1 << 21 | /* separate stencil enable */<br>
> - 1 << 22 | /* hiz enable */<br>
> - BRW_TILEWALK_YMAJOR << 26 |<br>
> - 1 << 27 | /* y-tiled */<br>
> - surftype << 29);<br>
> -<br>
> - /* 3DSTATE_DEPTH_BUFFER dw2 */<br>
> - OUT_RELOC(params-><a href="http://depth.bo" rel="noreferrer" target="_blank">depth.bo</a>,<br>
> - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,<br>
> - params->depth.offset);<br>
> -<br>
> - /* 3DSTATE_DEPTH_BUFFER dw3 */<br>
> - OUT_BATCH(BRW_SURFACE_<wbr>MIPMAPLAYOUT_BELOW << 1 |<br>
> - (params->depth.surf.logical_<wbr>level0_px.width - 1) << 6 |<br>
> - (params->depth.surf.logical_<wbr>level0_px.height - 1) << 19 |<br>
> - params->depth.view.base_level << 2);<br>
> -<br>
> - /* 3DSTATE_DEPTH_BUFFER dw4 */<br>
> - OUT_BATCH((depth - 1) << 21 |<br>
> - params->depth.view.base_array_<wbr>layer << 10 |<br>
> - (depth - 1) << 1);<br>
> -<br>
> - /* 3DSTATE_DEPTH_BUFFER dw5 */<br>
> - OUT_BATCH(0);<br>
> -<br>
> - /* 3DSTATE_DEPTH_BUFFER dw6 */<br>
> - OUT_BATCH(0);<br>
> - ADVANCE_BATCH();<br>
> - }<br>
> + db.SurfaceFormat = params->depth_format;<br>
><br>
> - /* 3DSTATE_HIER_DEPTH_BUFFER */<br>
> - {<br>
> - BEGIN_BATCH(3);<br>
> - OUT_BATCH((_3DSTATE_HIER_<wbr>DEPTH_BUFFER << 16) | (3 - 2));<br>
> - OUT_BATCH(params->depth.aux_<wbr>surf.row_pitch - 1);<br>
> - OUT_RELOC(params->depth.aux_<wbr>bo,<br>
> - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,<br>
> - params->depth.aux_offset);<br>
> - ADVANCE_BATCH();<br>
> - }<br>
> + db.TiledSurface = true;<br>
> + db.TileWalk = TILEWALK_YMAJOR;<br>
> + db.MIPMapLayoutMode = MIPLAYOUT_BELOW;<br>
><br>
> - /* 3DSTATE_STENCIL_BUFFER */<br>
> - {<br>
> - BEGIN_BATCH(3);<br>
> - OUT_BATCH((_3DSTATE_STENCIL_<wbr>BUFFER << 16) | (3 - 2));<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - ADVANCE_BATCH();<br>
> - }<br>
> -}<br>
> + db.<wbr>HierarchicalDepthBufferEnable = true;<br>
> + db.SeparateStencilBufferEnable = true;<br>
><br>
> + db.Width = params->depth.surf.logical_<wbr>level0_px.width - 1;<br>
> + db.Height = params->depth.surf.logical_<wbr>level0_px.height - 1;<br>
> + db.RenderTargetViewExtent = db.Depth =<br>
> + MAX2(params->depth.surf.<wbr>logical_level0_px.depth,<br>
> + params->depth.surf.logical_<wbr>level0_px.array_len) - 1;<br>
><br>
> -static void<br>
> -gen6_blorp_emit_depth_<wbr>disable(struct brw_context *brw,<br>
> - const struct brw_blorp_params *params)<br>
> -{<br>
> - brw_emit_depth_stall_flushes(<wbr>brw);<br>
> + db.LOD = params->depth.view.base_level;<br>
> + db.MinimumArrayElement = params->depth.view.base_array_<wbr>layer;<br>
><br>
> - BEGIN_BATCH(7);<br>
> - OUT_BATCH(_3DSTATE_DEPTH_<wbr>BUFFER << 16 | (7 - 2));<br>
> - OUT_BATCH((BRW_DEPTHFORMAT_<wbr>D32_FLOAT << 18) |<br>
> - (BRW_SURFACE_NULL << 29));<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - ADVANCE_BATCH();<br>
> -<br>
> - BEGIN_BATCH(3);<br>
> - OUT_BATCH(_3DSTATE_HIER_DEPTH_<wbr>BUFFER << 16 | (3 - 2));<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - ADVANCE_BATCH();<br>
> -<br>
> - BEGIN_BATCH(3);<br>
> - OUT_BATCH(_3DSTATE_STENCIL_<wbr>BUFFER << 16 | (3 - 2));<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - ADVANCE_BATCH();<br>
> -}<br>
> + db.SurfacePitch = params->depth.surf.row_pitch - 1;<br>
> + db.SurfaceBaseAddress = (struct blorp_address) {<br>
> + .buffer = params-><a href="http://depth.bo" rel="noreferrer" target="_blank">depth.bo</a>,<br>
> + .read_domains = I915_GEM_DOMAIN_RENDER,<br>
> + .write_domain = I915_GEM_DOMAIN_RENDER,<br>
> + .offset = params->depth.offset,<br>
> + };<br>
> + }<br>
><br>
> + blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_<wbr>BUFFER), hiz) {<br>
> + hiz.SurfacePitch = params->depth.aux_surf.row_<wbr>pitch - 1;<br>
> + hiz.SurfaceBaseAddress = (struct blorp_address) {<br>
> + .buffer = params->depth.aux_bo,<br>
> + .read_domains = I915_GEM_DOMAIN_RENDER,<br>
> + .write_domain = I915_GEM_DOMAIN_RENDER,<br>
> + .offset = params->depth.aux_offset,<br>
> + };<br>
> + }<br>
><br>
> -/* 3DSTATE_CLEAR_PARAMS<br>
> - *<br>
> - * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:<br>
> - * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE<br>
> - * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.<br>
> - */<br>
<br>
</div></div>I would keep this.<br>
<div><div class="h5"><br>
> -static void<br>
> -gen6_blorp_emit_clear_params(<wbr>struct brw_context *brw,<br>
> - const struct brw_blorp_params *params)<br>
> -{<br>
> - BEGIN_BATCH(2);<br>
> - OUT_BATCH(_3DSTATE_CLEAR_<wbr>PARAMS << 16 |<br>
> - GEN5_DEPTH_CLEAR_VALID |<br>
> - (2 - 2));<br>
> - OUT_BATCH(params->depth.clear_<wbr>color.u32[0]);<br>
> - ADVANCE_BATCH();<br>
> + blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb);<br>
> }<br>
><br>
> +<br>
> /* 3DSTATE_VIEWPORT_STATE_<wbr>POINTERS */<br>
> static void<br>
> -gen6_blorp_emit_viewport_<wbr>state(struct brw_context *brw,<br>
> - const struct brw_blorp_params *params)<br>
> +blorp_emit_viewport_state(<wbr>struct brw_context *brw,<br>
> + const struct brw_blorp_params *params)<br>
> {<br>
> - struct brw_cc_viewport *ccv;<br>
> uint32_t cc_vp_offset;<br>
><br>
> - ccv = (struct brw_cc_viewport *)brw_state_batch(brw, AUB_TRACE_CC_VP_STATE,<br>
> - sizeof(*ccv), 32,<br>
> - &cc_vp_offset);<br>
> + void *state = brw_state_batch(brw, AUB_TRACE_CC_VP_STATE,<br>
> + GENX(CC_VIEWPORT_length) * 4, 32,<br>
> + &cc_vp_offset);<br>
><br>
> - ccv->min_depth = 0.0;<br>
> - ccv->max_depth = 1.0;<br>
> + GENX(CC_VIEWPORT_pack)(brw, state,<br>
> + &(struct GENX(CC_VIEWPORT)) {<br>
> + .MinimumDepth = 0.0,<br>
<br>
</div></div>Perhaps drop also and rely on initialised zero?<br></blockquote><div><br></div><div>Again, this value matters so I don't want to depend too heavily on automatic zeroing.<br><br></div><div>I put the comments back in.<br><br></div><div>--Jason<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div><div class="h5">
> + .MaximumDepth = 1.0,<br>
> + });<br>
><br>
> - BEGIN_BATCH(4);<br>
> - OUT_BATCH(_3DSTATE_VIEWPORT_<wbr>STATE_POINTERS << 16 | (4 - 2) |<br>
> - GEN6_CC_VIEWPORT_MODIFY);<br>
> - OUT_BATCH(0); /* clip VP */<br>
> - OUT_BATCH(0); /* SF VP */<br>
> - OUT_BATCH(cc_vp_offset);<br>
> - ADVANCE_BATCH();<br>
> + blorp_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_<wbr>POINTERS), vsp) {<br>
> + vsp.CCViewportStateChange = true;<br>
> + vsp.PointertoCC_VIEWPORT = cc_vp_offset;<br>
> + }<br>
> }<br>
><br>
><br>
> -/* 3DPRIMITIVE */<br>
> -static void<br>
> -gen6_blorp_emit_primitive(<wbr>struct brw_context *brw,<br>
> - const struct brw_blorp_params *params)<br>
> -{<br>
> - BEGIN_BATCH(6);<br>
> - OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |<br>
> - _3DPRIM_RECTLIST << GEN4_3DPRIM_TOPOLOGY_TYPE_<wbr>SHIFT |<br>
> - GEN4_3DPRIM_VERTEXBUFFER_<wbr>ACCESS_SEQUENTIAL);<br>
> - OUT_BATCH(3); /* vertex count per instance */<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(params->num_layers); /* instance count */<br>
> - OUT_BATCH(0);<br>
> - OUT_BATCH(0);<br>
> - ADVANCE_BATCH();<br>
> -}<br>
> -<br>
> /**<br>
> * \brief Execute a blit or render pass operation.<br>
> *<br>
> @@ -514,18 +290,29 @@ gen6_blorp_exec(struct brw_context *brw,<br>
> brw_upload_state_base_address(<wbr>brw);<br>
><br>
> gen6_blorp_emit_vertices(brw, params);<br>
> - gen6_blorp_emit_urb_config(<wbr>brw, params);<br>
> +<br>
> + blorp_emit(brw, GENX(3DSTATE_URB), urb) {<br>
> + urb.VSNumberofURBEntries = brw->urb.max_vs_entries;<br>
> + }<br>
> +<br>
> if (params->wm_prog_data) {<br>
> cc_blend_state_offset = gen6_blorp_emit_blend_state(<wbr>brw, params);<br>
> cc_state_offset = gen6_blorp_emit_cc_state(brw);<br>
> }<br>
> depthstencil_offset = gen6_blorp_emit_depth_stencil_<wbr>state(brw, params);<br>
> - gen6_blorp_emit_cc_state_<wbr>pointers(brw, params, cc_blend_state_offset,<br>
> - depthstencil_offset, cc_state_offset);<br>
><br>
> - gen6_blorp_emit_constant_<wbr>disable(brw, _3DSTATE_CONSTANT_VS);<br>
> - gen6_blorp_emit_constant_<wbr>disable(brw, _3DSTATE_CONSTANT_GS);<br>
> - gen6_blorp_emit_constant_<wbr>disable(brw, _3DSTATE_CONSTANT_PS);<br>
> + blorp_emit(brw, GENX(3DSTATE_CC_STATE_<wbr>POINTERS), cc) {<br>
> + cc.BLEND_STATEChange = true;<br>
> + cc.COLOR_CALC_STATEChange = true;<br>
> + cc.DEPTH_STENCIL_STATEChange = true;<br>
> + cc.PointertoBLEND_STATE = cc_blend_state_offset;<br>
> + cc.PointertoCOLOR_CALC_STATE = cc_state_offset;<br>
> + cc.PointertoDEPTH_STENCIL_<wbr>STATE = depthstencil_offset;<br>
> + }<br>
> +<br>
> + blorp_emit(brw, GENX(3DSTATE_CONSTANT_VS), vs);<br>
> + blorp_emit(brw, GENX(3DSTATE_CONSTANT_GS), gs);<br>
> + blorp_emit(brw, GENX(3DSTATE_CONSTANT_PS), ps);<br>
><br>
> if (params->wm_prog_data) {<br>
> uint32_t wm_surf_offset_renderbuffer;<br>
> @@ -544,32 +331,70 @@ gen6_blorp_exec(struct brw_context *brw,<br>
> gen6_blorp_emit_binding_table(<wbr>brw,<br>
> wm_surf_offset_renderbuffer,<br>
> wm_surf_offset_texture);<br>
> - gen6_blorp_emit_binding_table_<wbr>pointers(brw, wm_bind_bo_offset);<br>
> +<br>
> + blorp_emit(brw, GENX(3DSTATE_BINDING_TABLE_<wbr>POINTERS), bt) {<br>
> + bt.PSBindingTableChange = true;<br>
> + bt.PointertoPSBindingTable = wm_bind_bo_offset;<br>
> + }<br>
> }<br>
><br>
> if (params-><a href="http://src.bo" rel="noreferrer" target="_blank">src.bo</a>) {<br>
> const uint32_t sampler_offset =<br>
> - gen6_blorp_emit_sampler_state(<wbr>brw, BRW_MAPFILTER_LINEAR, 0, true);<br>
> - gen6_blorp_emit_sampler_state_<wbr>pointers(brw, sampler_offset);<br>
> + gen6_blorp_emit_sampler_state(<wbr>brw, MAPFILTER_LINEAR, 0, true);<br>
> +<br>
> + blorp_emit(brw, GENX(3DSTATE_SAMPLER_STATE_<wbr>POINTERS), ssp) {<br>
> + ssp.VSSamplerStateChange = true;<br>
> + ssp.GSSamplerStateChange = true;<br>
> + ssp.PSSamplerStateChange = true;<br>
> + ssp.PointertoPSSamplerState = sampler_offset;<br>
> + }<br>
> }<br>
><br>
> gen6_emit_3dstate_multisample(<wbr>brw, params->dst.surf.samples);<br>
> - gen6_emit_3dstate_sample_mask(<wbr>brw,<br>
> - params->dst.surf.samples > 1 ?<br>
> - (1 << params->dst.surf.samples) - 1 : 1);<br>
> -<br>
> - gen6_blorp_emit_vs_disable(<wbr>brw, params);<br>
> - gen6_blorp_emit_gs_disable(<wbr>brw, params);<br>
> - gen6_blorp_emit_clip_disable(<wbr>brw);<br>
> - gen6_blorp_emit_sf_config(brw, params);<br>
> - gen6_blorp_emit_wm_config(brw, params);<br>
> - gen6_blorp_emit_viewport_<wbr>state(brw, params);<br>
> -<br>
> - if (params-><a href="http://depth.bo" rel="noreferrer" target="_blank">depth.bo</a>)<br>
> - gen6_blorp_emit_depth_stencil_<wbr>config(brw, params);<br>
> - else<br>
> - gen6_blorp_emit_depth_disable(<wbr>brw, params);<br>
> - gen6_blorp_emit_clear_params(<wbr>brw, params);<br>
> - gen6_blorp_emit_drawing_<wbr>rectangle(brw, params);<br>
> - gen6_blorp_emit_primitive(brw, params);<br>
> +<br>
> + blorp_emit(brw, GENX(3DSTATE_SAMPLE_MASK), mask) {<br>
> + mask.SampleMask = (1 << params->dst.surf.samples) - 1;<br>
> + }<br>
> +<br>
> + blorp_emit(brw, GENX(3DSTATE_VS), vs);<br>
> + blorp_emit(brw, GENX(3DSTATE_GS), gs);<br>
> +<br>
> + blorp_emit(brw, GENX(3DSTATE_CLIP), clip) {<br>
> + clip.PerspectiveDivideDisable = true;<br>
> + }<br>
> +<br>
> + blorp_emit_sf_config(brw, params);<br>
> + blorp_emit_wm_config(brw, params);<br>
> +<br>
> + blorp_emit_viewport_state(brw, params);<br>
> +<br>
> + if (params-><a href="http://depth.bo" rel="noreferrer" target="_blank">depth.bo</a>) {<br>
> + blorp_emit_depth_stencil_<wbr>config(brw, params);<br>
> + } else {<br>
> + brw_emit_depth_stall_flushes(<wbr>brw);<br>
> +<br>
> + blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) {<br>
> + db.SurfaceType = SURFTYPE_NULL;<br>
> + db.SurfaceFormat = D32_FLOAT;<br>
> + }<br>
> + blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_<wbr>BUFFER), hiz);<br>
> + blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb);<br>
> + }<br>
> +<br>
> + blorp_emit(brw, GENX(3DSTATE_CLEAR_PARAMS), clear) {<br>
> + clear.DepthClearValueValid = true;<br>
> + clear.DepthClearValue = params->depth.clear_color.u32[<wbr>0];<br>
> + }<br>
> +<br>
> + blorp_emit(brw, GENX(3DSTATE_DRAWING_<wbr>RECTANGLE), rect) {<br>
> + rect.<wbr>ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;<br>
> + rect.<wbr>ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;<br>
> + }<br>
> +<br>
> + blorp_emit(brw, GENX(3DPRIMITIVE), prim) {<br>
> + prim.VertexAccessType = SEQUENTIAL;<br>
> + prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;<br>
> + prim.VertexCountPerInstance = 3;<br>
> + prim.InstanceCount = params->num_layers;<br>
> + }<br>
> }<br>
> --<br>
> 2.5.0.400.gff86faf<br>
><br>
</div></div>> ______________________________<wbr>_________________<br>
> mesa-dev mailing list<br>
> <a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
> <a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</blockquote></div><br></div></div>