[Mesa-dev] [PATCH 16/31] i965/blorp: Pull the guts of blorp_exec into a driver-agnostic header

Jason Ekstrand jason at jlekstrand.net
Tue Aug 23 23:30:56 UTC 2016


On Tue, Aug 23, 2016 at 8:28 AM, Pohjolainen, Topi <
topi.pohjolainen at gmail.com> wrote:

> On Fri, Aug 19, 2016 at 09:55:53AM -0700, Jason Ekstrand wrote:
> > ---
> >  src/mesa/drivers/dri/i965/Makefile.sources  |   15 +-
> >  src/mesa/drivers/dri/i965/blorp_priv.h      |    2 +-
> >  src/mesa/drivers/dri/i965/genX_blorp_exec.c | 1113
> +-------------------------
> >  src/mesa/drivers/dri/i965/genX_blorp_exec.h | 1121
> +++++++++++++++++++++++++++
> >  4 files changed, 1133 insertions(+), 1118 deletions(-)
> >  create mode 100644 src/mesa/drivers/dri/i965/genX_blorp_exec.h
> >
> > diff --git a/src/mesa/drivers/dri/i965/Makefile.sources
> b/src/mesa/drivers/dri/i965/Makefile.sources
> > index 5ea7b96..c97486c 100644
> > --- a/src/mesa/drivers/dri/i965/Makefile.sources
> > +++ b/src/mesa/drivers/dri/i965/Makefile.sources
> > @@ -259,16 +259,21 @@ i965_FILES = \
> >       intel_upload.c
> >
> >  i965_gen6_FILES = \
> > -     genX_blorp_exec.c
> > +     genX_blorp_exec.c \
> > +     genX_blorp_exec.h
> >
> >  i965_gen7_FILES = \
> > -     genX_blorp_exec.c
> > +     genX_blorp_exec.c \
> > +     genX_blorp_exec.h
> >
> >  i965_gen75_FILES = \
> > -     genX_blorp_exec.c
> > +     genX_blorp_exec.c \
> > +     genX_blorp_exec.h
> >
> >  i965_gen8_FILES = \
> > -     genX_blorp_exec.c
> > +     genX_blorp_exec.c \
> > +     genX_blorp_exec.h
> >
> >  i965_gen9_FILES = \
> > -     genX_blorp_exec.c
> > +     genX_blorp_exec.c \
> > +     genX_blorp_exec.h
> > diff --git a/src/mesa/drivers/dri/i965/blorp_priv.h
> b/src/mesa/drivers/dri/i965/blorp_priv.h
> > index 977f54d..9b987a8 100644
> > --- a/src/mesa/drivers/dri/i965/blorp_priv.h
> > +++ b/src/mesa/drivers/dri/i965/blorp_priv.h
> > @@ -141,7 +141,7 @@ struct brw_blorp_prog_data
> >      */
> >     uint32_t flat_inputs;
> >     unsigned num_varying_inputs;
> > -   GLbitfield64 inputs_read;
> > +   uint64_t inputs_read;
> >  };
> >
> >  static inline unsigned
> > diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
> b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
> > index 8c15b16..e07fa0a 100644
> > --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
> > +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
> > @@ -29,9 +29,7 @@
> >  #include "brw_context.h"
> >  #include "brw_state.h"
> >
> > -#include "blorp_priv.h"
> > -
> > -#include "genxml/gen_macros.h"
> > +#include "genX_blorp_exec.h"
> >
> >  static void *
> >  blorp_emit_dwords(struct blorp_context *blorp, void *batch, unsigned n)
> > @@ -168,1115 +166,6 @@ blorp_emit_3dstate_multisample(struct
> blorp_context *blorp, void *batch,
> >  #endif
> >  }
> >
> > -struct blorp_batch {
> > -   struct blorp_context *blorp;
> > -   void *batch;
> > -};
> > -
> > -#define __gen_address_type struct blorp_address
> > -#define __gen_user_data struct blorp_batch
> > -
> > -static uint64_t
> > -__gen_combine_address(struct blorp_batch *batch, void *location,
> > -                      struct blorp_address address, uint32_t delta)
> > -{
> > -   if (address.buffer == NULL) {
> > -      return address.offset + delta;
> > -   } else {
> > -      return blorp_emit_reloc(batch->blorp, batch->batch,
> > -                              location, address, delta);
> > -   }
> > -}
> > -
> > -#include "genxml/genX_pack.h"
> > -
> > -#define _blorp_cmd_length(cmd) cmd ## _length
> > -#define _blorp_cmd_length_bias(cmd) cmd ## _length_bias
> > -#define _blorp_cmd_header(cmd) cmd ## _header
> > -#define _blorp_cmd_pack(cmd) cmd ## _pack
> > -
> > -#define blorp_emit(batch, cmd, name)                        \
> > -   for (struct cmd name = { _blorp_cmd_header(cmd) },       \
> > -        *_dst = blorp_emit_dwords(batch.blorp, batch.batch, \
> > -                                  _blorp_cmd_length(cmd));  \
> > -        __builtin_expect(_dst != NULL, 1);                  \
> > -        _blorp_cmd_pack(cmd)(&batch, (void *)_dst, &name),   \
> > -        _dst = NULL)
> > -
> > -#define blorp_emitn(batch, cmd, n) ({
>  \
> > -      uint32_t *_dw = blorp_emit_dwords(batch.blorp, batch.batch, n);
>  \
> > -      struct cmd template = {
>  \
> > -         _blorp_cmd_header(cmd),
> \
> > -         .DWordLength = n - _blorp_cmd_length_bias(cmd),
> \
> > -      };
> \
> > -      _blorp_cmd_pack(cmd)(&batch, _dw, &template);
>  \
> > -      _dw + 1; /* Array starts at dw[1] */
> \
> > -   })
> > -
> > -/* Once vertex fetcher has written full VUE entries with complete
> > - * header the space requirement is as follows per vertex (in bytes):
> > - *
> > - *     Header    Position    Program constants
> > - *   +--------+------------+-------------------+
> > - *   |   16   |     16     |      n x 16       |
> > - *   +--------+------------+-------------------+
> > - *
> > - * where 'n' stands for number of varying inputs expressed as vec4s.
> > - *
> > - * The URB size is in turn expressed in 64 bytes (512 bits).
> > - */
> > -static inline unsigned
> > -gen7_blorp_get_vs_entry_size(const struct brw_blorp_params *params)
> > -{
> > -    const unsigned num_varyings =
> > -       params->wm_prog_data ? params->wm_prog_data->num_varying_inputs
> : 0;
> > -    const unsigned total_needed = 16 + 16 + num_varyings * 16;
> > -
> > -   return DIV_ROUND_UP(total_needed, 64);
> > -}
> > -
> > -/* 3DSTATE_URB
> > -/* 3DSTATE_URB_VS
> > - * 3DSTATE_URB_HS
> > - * 3DSTATE_URB_DS
> > - * 3DSTATE_URB_GS
> > - *
> > - * Assign the entire URB to the VS. Even though the VS disabled, URB
> space
> > - * is still needed because the clipper loads the VUE's from the URB.
> From
> > - * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
> > - * Dword 1.15:0 "VS Number of URB Entries":
> > - *     This field is always used (even if VS Function Enable is
> DISABLED).
> > - *
> > - * The warning below appears in the PRM (Section 3DSTATE_URB), but we
> can
> > - * safely ignore it because this batch contains only one draw call.
> > - *     Because of URB corruption caused by allocating a previous GS unit
> > - *     URB entry to the VS unit, software is required to send a ???GS
> NULL
> > - *     Fence??? (Send URB fence with VS URB size == 1 and GS URB size
> == 0)
> > - *     plus a dummy DRAW call before any case where VS will be taking
> over
> > - *     GS URB space.
>
> You are dropping all the documentation above.
>

Yes, that was a rebase fail.  I've fixed it locally.


> > - *
> > - * If the 3DSTATE_URB_VS is emitted, than the others must be also.
> > - * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1
> 3DSTATE_URB_VS:
> > - *
> > - *     3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
> > - *     programmed in order for the programming of this state to be
> > - *     valid.
> > - */
> > -static void
> > -emit_urb_config(struct blorp_batch batch,
> > -                const struct brw_blorp_params *params)
> > -{
> > -   blorp_emit_urb_config(batch.blorp, batch.batch,
> > -                         gen7_blorp_get_vs_entry_size(params));
> > -}
> > -
> > -static void
> > -blorp_emit_vertex_data(struct blorp_batch batch,
> > -                       const struct brw_blorp_params *params,
> > -                       struct blorp_address *addr,
> > -                       uint32_t *size)
> > -{
> > -   const float vertices[] = {
> > -      /* v0 */ (float)params->x0, (float)params->y1,
> > -      /* v1 */ (float)params->x1, (float)params->y1,
> > -      /* v2 */ (float)params->x0, (float)params->y0,
> > -   };
> > -
> > -   void *data = blorp_alloc_vertex_buffer(batch.blorp,
> sizeof(vertices), addr);
> > -   memcpy(data, vertices, sizeof(vertices));
> > -   *size = sizeof(vertices);
> > -}
> > -
> > -static void
> > -blorp_emit_input_varying_data(struct blorp_batch batch,
> > -                              const struct brw_blorp_params *params,
> > -                              struct blorp_address *addr,
> > -                              uint32_t *size)
> > -{
> > -   const unsigned vec4_size_in_bytes = 4 * sizeof(float);
> > -   const unsigned max_num_varyings =
> > -      DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes);
> > -   const unsigned num_varyings = params->wm_prog_data->num_
> varying_inputs;
> > -
> > -   *size = num_varyings * vec4_size_in_bytes;
> > -
> > -   const float *const inputs_src = (const float *)&params->wm_inputs;
> > -   float *inputs = blorp_alloc_vertex_buffer(batch.blorp, *size, addr);
> > -
> > -   /* Walk over the attribute slots, determine if the attribute is used
> by
> > -    * the program and when necessary copy the values from the input
> storage to
> > -    * the vertex data buffer.
> > -    */
> > -   for (unsigned i = 0; i < max_num_varyings; i++) {
> > -      const gl_varying_slot attr = VARYING_SLOT_VAR0 + i;
> > -
> > -      if (!(params->wm_prog_data->inputs_read & BITFIELD64_BIT(attr)))
> > -         continue;
> > -
> > -      memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes);
> > -
> > -      inputs += 4;
> > -   }
> > -}
> > -
> > -static void
> > -blorp_emit_vertex_buffers(struct blorp_batch batch,
> > -                          const struct brw_blorp_params *params)
> > -{
> > -   struct GENX(VERTEX_BUFFER_STATE) vb[2];
> > -   memset(vb, 0, sizeof(vb));
> > -
> > -   unsigned num_buffers = 1;
> > -
> > -   uint32_t size;
> > -   blorp_emit_vertex_data(batch, params, &vb[0].BufferStartingAddress,
> &size);
> > -   vb[0].VertexBufferIndex = 0;
> > -   vb[0].BufferPitch = 2 * sizeof(float);
> > -   vb[0].VertexBufferMOCS = batch.blorp->mocs.vb;
> > -#if GEN_GEN >= 7
> > -   vb[0].AddressModifyEnable = true;
> > -#endif
> > -#if GEN_GEN >= 8
> > -   vb[0].BufferSize = size;
> > -#else
> > -   vb[0].BufferAccessType = VERTEXDATA;
> > -   vb[0].EndAddress = vb[0].BufferStartingAddress;
> > -   vb[0].EndAddress.offset += size - 1;
> > -#endif
> > -
> > -   if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs)
> {
> > -      blorp_emit_input_varying_data(batch, params,
> > -                                    &vb[1].BufferStartingAddress,
> &size);
> > -      vb[1].VertexBufferIndex = 1;
> > -      vb[1].BufferPitch = 0;
> > -      vb[1].VertexBufferMOCS = batch.blorp->mocs.vb;
> > -#if GEN_GEN >= 7
> > -      vb[1].AddressModifyEnable = true;
> > -#endif
> > -#if GEN_GEN >= 8
> > -      vb[1].BufferSize = size;
> > -#else
> > -      vb[1].BufferAccessType = INSTANCEDATA;
> > -      vb[1].EndAddress = vb[1].BufferStartingAddress;
> > -      vb[1].EndAddress.offset += size - 1;
> > -#endif
> > -      num_buffers++;
> > -   }
> > -
> > -   const unsigned num_dwords =
> > -      1 + GENX(VERTEX_BUFFER_STATE_length) * num_buffers;
> > -   uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS),
> num_dwords);
> > -
> > -   for (unsigned i = 0; i < num_buffers; i++) {
> > -      GENX(VERTEX_BUFFER_STATE_pack)(&batch, dw, &vb[i]);
> > -      dw += GENX(VERTEX_BUFFER_STATE_length);
> > -   }
> > -}
> > -
> > -static void
> > -blorp_emit_vertex_elements(struct blorp_batch batch,
> > -                           const struct brw_blorp_params *params)
> > -{
> > -   const unsigned num_varyings =
> > -      params->wm_prog_data ? params->wm_prog_data->num_varying_inputs
> : 0;
> > -   const unsigned num_elements = 2 + num_varyings;
> > -
> > -   struct GENX(VERTEX_ELEMENT_STATE) ve[num_elements];
> > -   memset(ve, 0, num_elements * sizeof(*ve));
> > -
> > -   /* Setup VBO for the rectangle primitive..
> > -    *
> > -    * A rectangle primitive (3DPRIM_RECTLIST) consists of only three
> > -    * vertices. The vertices reside in screen space with DirectX
> > -    * coordinates (that is, (0, 0) is the upper left corner).
> > -    *
> > -    *   v2 ------ implied
> > -    *    |        |
> > -    *    |        |
> > -    *   v0 ----- v1
> > -    *
> > -    * Since the VS is disabled, the clipper loads each VUE directly from
> > -    * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
> > -    * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as
> follows:
> > -    *   dw0: Reserved, MBZ.
> > -    *   dw1: Render Target Array Index. The HiZ op does not use indexed
> > -    *        vertices, so set the dword to 0.
> > -    *   dw2: Viewport Index. The HiZ op disables viewport mapping and
> > -    *        scissoring, so set the dword to 0.
> > -    *   dw3: Point Width: The HiZ op does not emit the POINTLIST
> primitive,
> > -    *        so set the dword to 0.
> > -    *   dw4: Vertex Position X.
> > -    *   dw5: Vertex Position Y.
> > -    *   dw6: Vertex Position Z.
> > -    *   dw7: Vertex Position W.
> > -    *
> > -    *   dw8: Flat vertex input 0
> > -    *   dw9: Flat vertex input 1
> > -    *   ...
> > -    *   dwn: Flat vertex input n - 8
> > -    *
> > -    * For details, see the Sandybridge PRM, Volume 2, Part 1, Section
> 1.5.1
> > -    * "Vertex URB Entry (VUE) Formats".
> > -    *
> > -    * Only vertex position X and Y are going to be variable, Z is fixed
> to
> > -    * zero and W to one. Header words dw0-3 are all zero. There is no
> need to
> > -    * include the fixed values in the vertex buffer. Vertex fetcher can
> be
> > -    * instructed to fill vertex elements with constant values of one
> and zero
> > -    * instead of reading them from the buffer.
> > -    * Flat inputs are program constants that are not interpolated.
> Moreover
> > -    * their values will be the same between vertices.
> > -    *
> > -    * See the vertex element setup below.
> > -    */
> > -   ve[0].VertexBufferIndex = 0;
> > -   ve[0].Valid = true;
> > -   ve[0].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
> > -   ve[0].SourceElementOffset = 0;
> > -   ve[0].Component0Control = VFCOMP_STORE_0;
> > -   ve[0].Component1Control = VFCOMP_STORE_0;
> > -   ve[0].Component2Control = VFCOMP_STORE_0;
> > -   ve[0].Component3Control = VFCOMP_STORE_0;
> > -
> > -   ve[1].VertexBufferIndex = 0;
> > -   ve[1].Valid = true;
> > -   ve[1].SourceElementFormat = ISL_FORMAT_R32G32_FLOAT;
> > -   ve[1].SourceElementOffset = 0;
> > -   ve[1].Component0Control = VFCOMP_STORE_SRC;
> > -   ve[1].Component1Control = VFCOMP_STORE_SRC;
> > -   ve[1].Component2Control = VFCOMP_STORE_0;
> > -   ve[1].Component3Control = VFCOMP_STORE_1_FP;
> > -
> > -   for (unsigned i = 0; i < num_varyings; ++i) {
> > -      ve[i + 2].VertexBufferIndex = 1;
> > -      ve[i + 2].Valid = true;
> > -      ve[i + 2].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
> > -      ve[i + 2].SourceElementOffset = i * 4 * sizeof(float);
> > -      ve[i + 2].Component0Control = VFCOMP_STORE_SRC;
> > -      ve[i + 2].Component1Control = VFCOMP_STORE_SRC;
> > -      ve[i + 2].Component2Control = VFCOMP_STORE_SRC;
> > -      ve[i + 2].Component3Control = VFCOMP_STORE_SRC;
> > -   }
> > -
> > -   const unsigned num_dwords =
> > -      1 + GENX(VERTEX_ELEMENT_STATE_length) * num_elements;
> > -   uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_ELEMENTS),
> num_dwords);
> > -
> > -   for (unsigned i = 0; i < num_elements; i++) {
> > -      GENX(VERTEX_ELEMENT_STATE_pack)(&batch, dw, &ve[i]);
> > -      dw += GENX(VERTEX_ELEMENT_STATE_length);
> > -   }
> > -
> > -#if GEN_GEN >= 8
> > -   blorp_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs);
> > -
> > -   for (unsigned i = 0; i < num_elements; i++) {
> > -      blorp_emit(batch, GENX(3DSTATE_VF_INSTANCING), vf) {
> > -         vf.VertexElementIndex = i;
> > -         vf.InstancingEnable = false;
> > -      }
> > -   }
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
> > -      topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
> > -   }
> > -#endif
> > -}
> > -
> > -static void
> > -blorp_emit_sf_config(struct blorp_batch batch,
> > -                     const struct brw_blorp_params *params)
> > -{
> > -   const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
> > -
> > -   /* 3DSTATE_SF
> > -    *
> > -    * Disable ViewportTransformEnable (dw2.1)
> > -    *
> > -    * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
> > -    * Primitives Overview":
> > -    *     RECTLIST: Viewport Mapping must be DISABLED (as is typical
> with the
> > -    *     use of screen- space coordinates).
> > -    *
> > -    * A solid rectangle must be rendered, so set FrontFaceFillMode
> (dw2.4:3)
> > -    * and BackFaceFillMode (dw2.5:6) to SOLID(0).
> > -    *
> > -    * From the Sandy Bridge PRM, Volume 2, Part 1, Section
> > -    * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
> > -    *     SOLID: Any triangle or rectangle object found to be
> front-facing
> > -    *     is rendered as a solid object. This setting is required when
> > -    *     (rendering rectangle (RECTLIST) objects.
> > -    */
>
> And all this.
>
> > -
> > -#if GEN_GEN >= 8
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_SF), sf);
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_RASTER), raster) {
> > -      raster.CullMode = CULLMODE_NONE;
> > -   }
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_SBE), sbe) {
> > -      sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
> > -      sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
> > -      sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_
> data);
> > -      sbe.ForceVertexURBEntryReadLength = true;
> > -      sbe.ForceVertexURBEntryReadOffset = true;
> > -      sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
> > -
> > -#if GEN_GEN >= 9
> > -      for (unsigned i = 0; i < 32; i++)
> > -         sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
> > -#endif
> > -   }
> > -
> > -#elif GEN_GEN >= 7
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_SF), sf) {
> > -      sf.FrontFaceFillMode = FILL_MODE_SOLID;
> > -      sf.BackFaceFillMode = FILL_MODE_SOLID;
> > -
> > -      sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
> > -         MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
> > -
> > -#if GEN_GEN == 7
> > -      sf.DepthBufferSurfaceFormat = params->depth_format;
> > -#endif
> > -   }
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_SBE), sbe) {
> > -      sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
> > -      if (prog_data) {
> > -         sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
> > -         sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_
> data);
> > -         sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
> > -      } else {
> > -         sbe.NumberofSFOutputAttributes = 0;
> > -         sbe.VertexURBEntryReadLength = 1;
> > -      }
> > -   }
> > -
> > -#else /* GEN_GEN <= 6 */
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_SF), sf) {
> > -      sf.FrontFaceFillMode = FILL_MODE_SOLID;
> > -      sf.BackFaceFillMode = FILL_MODE_SOLID;
> > -
> > -      sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
> > -         MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
> > -
> > -      sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
> > -      if (prog_data) {
> > -         sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
> > -         sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_
> data);
> > -         sf.ConstantInterpolationEnable = prog_data->flat_inputs;
> > -      } else {
> > -         sf.NumberofSFOutputAttributes = 0;
> > -         sf.VertexURBEntryReadLength = 1;
> > -      }
> > -   }
> > -
> > -#endif /* GEN_GEN */
> > -}
> > -
> > -static void
> > -blorp_emit_ps_config(struct blorp_batch batch,
> > -                     const struct brw_blorp_params *params)
> > -{
> > -   const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
> > -
> > -   /* Even when thread dispatch is disabled, max threads (dw5.25:31)
> must be
> > -    * nonzero to prevent the GPU from hanging.  While the documentation
> doesn't
> > -    * mention this explicitly, it notes that the valid range for the
> field is
> > -    * [1,39] = [2,40] threads, which excludes zero.
> > -    *
> > -    * To be safe (and to minimize extraneous code) we go ahead and fully
> > -    * configure the WM state whether or not there is a WM program.
> > -    */
>
> And here.
>
> > -
> > -#if GEN_GEN >= 8
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_WM), wm);
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_PS), ps) {
> > -      if (params->src.addr.buffer) {
> > -         ps.SamplerCount = 1; /* Up to 4 samplers */
> > -         ps.BindingTableEntryCount = 2;
> > -      } else {
> > -         ps.BindingTableEntryCount = 1;
> > -      }
> > -
> > -      ps.DispatchGRFStartRegisterForConstantSetupData0 =
> > -         prog_data->first_curbe_grf_0;
> > -      ps.DispatchGRFStartRegisterForConstantSetupData2 =
> > -         prog_data->first_curbe_grf_2;
> > -
> > -      ps._8PixelDispatchEnable = prog_data->dispatch_8;
> > -      ps._16PixelDispatchEnable = prog_data->dispatch_16;
> > -
> > -      ps.KernelStartPointer0 = params->wm_prog_kernel;
> > -      ps.KernelStartPointer2 =
> > -         params->wm_prog_kernel + prog_data->ksp_offset_2;
> > -
> > -      /* 3DSTATE_PS expects the number of threads per PSD, which is
> always 64;
> > -       * it implicitly scales for different GT levels (which have some
> # of
> > -       * PSDs).
> > -       *
> > -       * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
> > -       */
> > -      if (GEN_GEN >= 9)
> > -         ps.MaximumNumberofThreadsPerPSD = 64 - 1;
> > -      else
> > -         ps.MaximumNumberofThreadsPerPSD = 64 - 2;
> > -
> > -      switch (params->fast_clear_op) {
> > -#if GEN_GEN >= 9
> > -      case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
> > -         ps.RenderTargetResolveType = RESOLVE_PARTIAL;
> > -         break;
> > -      case (3 << 6): /* GEN9_PS_RENDER_TARGET_RESOLVE_FULL */
> > -         ps.RenderTargetResolveType = RESOLVE_FULL;
> > -         break;
> > -#else
> > -      case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
> > -         ps.RenderTargetResolveEnable = true;
> > -         break;
> > -#endif
> > -      case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */
> > -         ps.RenderTargetFastClearEnable = true;
> > -         break;
> > -      }
> > -   }
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) {
> > -      psx.PixelShaderValid = true;
> > -
> > -      if (params->src.addr.buffer)
> > -         psx.PixelShaderKillsPixel = true;
> > -
> > -      psx.AttributeEnable = prog_data->num_varying_inputs > 0;
> > -
> > -      if (prog_data && prog_data->persample_msaa_dispatch)
> > -         psx.PixelShaderIsPerSample = true;
> > -   }
> > -
> > -#elif GEN_GEN >= 7
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_WM), wm) {
> > -      switch (params->hiz_op) {
> > -      case GEN6_HIZ_OP_DEPTH_CLEAR:
> > -         wm.DepthBufferClear = true;
> > -         break;
> > -      case GEN6_HIZ_OP_DEPTH_RESOLVE:
> > -         wm.DepthBufferResolveEnable = true;
> > -         break;
> > -      case GEN6_HIZ_OP_HIZ_RESOLVE:
> > -         wm.HierarchicalDepthBufferResolveEnable = true;
> > -         break;
> > -      case GEN6_HIZ_OP_NONE:
> > -         break;
> > -      default:
> > -         unreachable("not reached");
> > -      }
> > -
> > -      if (prog_data)
> > -         wm.ThreadDispatchEnable = true;
> > -
> > -      if (params->src.addr.buffer)
> > -         wm.PixelShaderKillPixel = true;
> > -
> > -      if (params->dst.surf.samples > 1) {
> > -         wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
> > -         wm.MultisampleDispatchMode =
> > -            (prog_data && prog_data->persample_msaa_dispatch) ?
> > -            MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
> > -      } else {
> > -         wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
> > -         wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
> > -      }
> > -   }
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_PS), ps) {
> > -      ps.MaximumNumberofThreads = batch.blorp->isl_dev->info->max_wm_threads
> - 1;
> > -
> > -#if GEN_IS_HASWELL
> > -      ps.SampleMask = 1;
> > -#endif
> > -
> > -      if (prog_data) {
> > -         ps.DispatchGRFStartRegisterforConstantSetupData0 =
> > -            prog_data->first_curbe_grf_0;
> > -         ps.DispatchGRFStartRegisterforConstantSetupData2 =
> > -            prog_data->first_curbe_grf_2;
> > -
> > -         ps.KernelStartPointer0 = params->wm_prog_kernel;
> > -         ps.KernelStartPointer2 =
> > -            params->wm_prog_kernel + prog_data->ksp_offset_2;
> > -
> > -         ps._8PixelDispatchEnable = prog_data->dispatch_8;
> > -         ps._16PixelDispatchEnable = prog_data->dispatch_16;
> > -
> > -         ps.AttributeEnable = prog_data->num_varying_inputs > 0;
> > -      } else {
> > -         /* Gen7 hardware gets angry if we don't enable at least one
> dispatch
> > -          * mode, so just enable 16-pixel dispatch if we don't have a
> program.
> > -          */
> > -         ps._16PixelDispatchEnable = true;
> > -      }
> > -
> > -      if (params->src.addr.buffer)
> > -         ps.SamplerCount = 1; /* Up to 4 samplers */
> > -
> > -      switch (params->fast_clear_op) {
> > -      case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
> > -         ps.RenderTargetResolveEnable = true;
> > -         break;
> > -      case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */
> > -         ps.RenderTargetFastClearEnable = true;
> > -         break;
> > -      }
> > -   }
> > -
> > -#else /* GEN_GEN <= 6 */
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_WM), wm) {
> > -      wm.MaximumNumberofThreads = batch.blorp->isl_dev->info->max_wm_threads
> - 1;
> > -
> > -      switch (params->hiz_op) {
> > -      case GEN6_HIZ_OP_DEPTH_CLEAR:
> > -         wm.DepthBufferClear = true;
> > -         break;
> > -      case GEN6_HIZ_OP_DEPTH_RESOLVE:
> > -         wm.DepthBufferResolveEnable = true;
> > -         break;
> > -      case GEN6_HIZ_OP_HIZ_RESOLVE:
> > -         wm.HierarchicalDepthBufferResolveEnable = true;
> > -         break;
> > -      case GEN6_HIZ_OP_NONE:
> > -         break;
> > -      default:
> > -         unreachable("not reached");
> > -      }
> > -
> > -      if (prog_data) {
> > -         wm.ThreadDispatchEnable = true;
> > -
> > -         wm.DispatchGRFStartRegisterforConstantSetupData0 =
> > -            prog_data->first_curbe_grf_0;
> > -         wm.DispatchGRFStartRegisterforConstantSetupData2 =
> > -            prog_data->first_curbe_grf_2;
> > -
> > -         wm.KernelStartPointer0 = params->wm_prog_kernel;
> > -         wm.KernelStartPointer2 =
> > -            params->wm_prog_kernel + prog_data->ksp_offset_2;
> > -
> > -         wm._8PixelDispatchEnable = prog_data->dispatch_8;
> > -         wm._16PixelDispatchEnable = prog_data->dispatch_16;
> > -
> > -         wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
> > -      }
> > -
> > -      if (params->src.addr.buffer) {
> > -         wm.SamplerCount = 1; /* Up to 4 samplers */
> > -         wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on
> */
> > -      }
> > -
> > -      if (params->dst.surf.samples > 1) {
> > -         wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
> > -         wm.MultisampleDispatchMode =
> > -            (prog_data && prog_data->persample_msaa_dispatch) ?
> > -            MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
> > -      } else {
> > -         wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
> > -         wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
> > -      }
> > -   }
> > -
> > -#endif /* GEN_GEN */
> > -}
> > -
> > -
> > -static void
> > -blorp_emit_depth_stencil_config(struct blorp_batch batch,
> > -                                const struct brw_blorp_params *params)
> > -{
> > -#if GEN_GEN >= 7
> > -   const uint32_t mocs = 1; /* GEN7_MOCS_L3 */
> > -#else
> > -   const uint32_t mocs = 0;
> > -#endif
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_DEPTH_BUFFER), db) {
> > -      switch (params->depth.surf.dim) {
> > -      case ISL_SURF_DIM_1D:
> > -         db.SurfaceType = SURFTYPE_1D;
> > -         break;
> > -      case ISL_SURF_DIM_2D:
> > -         db.SurfaceType = SURFTYPE_2D;
> > -         break;
> > -      case ISL_SURF_DIM_3D:
> > -         db.SurfaceType = SURFTYPE_3D;
> > -         break;
> > -      }
> > -
> > -      db.SurfaceFormat = params->depth_format;
> > -
> > -#if GEN_GEN >= 7
> > -      db.DepthWriteEnable = true;
> > -#endif
> > -
> > -#if GEN_GEN <= 6
> > -      db.TiledSurface = true;
> > -      db.TileWalk = TILEWALK_YMAJOR;
> > -      db.MIPMapLayoutMode = MIPLAYOUT_BELOW;
> > -      db.SeparateStencilBufferEnable = true;
> > -#endif
> > -
> > -      db.HierarchicalDepthBufferEnable = true;
> > -
> > -      db.Width = params->depth.surf.logical_level0_px.width - 1;
> > -      db.Height = params->depth.surf.logical_level0_px.height - 1;
> > -      db.RenderTargetViewExtent = db.Depth =
> > -         MAX2(params->depth.surf.logical_level0_px.depth,
> > -              params->depth.surf.logical_level0_px.array_len) - 1;
> > -
> > -      db.LOD = params->depth.view.base_level;
> > -      db.MinimumArrayElement = params->depth.view.base_array_layer;
> > -
> > -      db.SurfacePitch = params->depth.surf.row_pitch - 1;
> > -      db.SurfaceBaseAddress = params->depth.addr;
> > -      db.DepthBufferMOCS = mocs;
> > -   }
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) {
> > -      hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1;
> > -      hiz.SurfaceBaseAddress = params->depth.aux_addr;
> > -      hiz.HierarchicalDepthBufferMOCS = mocs;
> > -   }
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_STENCIL_BUFFER), sb);
> > -}
> > -
> > -static uint32_t
> > -blorp_emit_blend_state(struct blorp_batch batch,
> > -                       const struct brw_blorp_params *params)
> > -{
> > -   struct GENX(BLEND_STATE) blend;
> > -   memset(&blend, 0, sizeof(blend));
> > -
> > -   for (unsigned i = 0; i < params->num_draw_buffers; ++i) {
> > -      blend.Entry[i].PreBlendColorClampEnable = true;
> > -      blend.Entry[i].PostBlendColorClampEnable = true;
> > -      blend.Entry[i].ColorClampRange = COLORCLAMP_RTFORMAT;
> > -
> > -      blend.Entry[i].WriteDisableRed = params->color_write_disable[0];
> > -      blend.Entry[i].WriteDisableGreen = params->color_write_disable[1]
> ;
> > -      blend.Entry[i].WriteDisableBlue = params->color_write_disable[2];
> > -      blend.Entry[i].WriteDisableAlpha = params->color_write_disable[3]
> ;
> > -   }
> > -
> > -   uint32_t offset;
> > -   void *state = blorp_alloc_dynamic_state(batch.blorp,
> > -                                           AUB_TRACE_BLEND_STATE,
> > -                                           GENX(BLEND_STATE_length) * 4,
> > -                                           64, &offset);
> > -   GENX(BLEND_STATE_pack)(NULL, state, &blend);
> > -
> > -#if GEN_GEN >= 7
> > -   blorp_emit(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) {
> > -      sp.BlendStatePointer = offset;
> > -#if GEN_GEN >= 8
> > -      sp.BlendStatePointerValid = true;
> > -#endif
> > -   }
> > -#endif
> > -
> > -#if GEN_GEN >= 8
> > -   blorp_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
> > -      ps_blend.HasWriteableRT = true;
> > -   }
> > -#endif
> > -
> > -   return offset;
> > -}
> > -
> > -static uint32_t
> > -blorp_emit_color_calc_state(struct blorp_batch batch,
> > -                            const struct brw_blorp_params *params)
> > -{
> > -   uint32_t offset;
> > -   void *state = blorp_alloc_dynamic_state(batch.blorp,
> > -                                           AUB_TRACE_CC_STATE,
> > -
>  GENX(COLOR_CALC_STATE_length) * 4,
> > -                                           64, &offset);
> > -   memset(state, 0, GENX(COLOR_CALC_STATE_length) * 4);
> > -
> > -#if GEN_GEN >= 7
> > -   blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), sp) {
> > -      sp.ColorCalcStatePointer = offset;
> > -#if GEN_GEN >= 8
> > -      sp.ColorCalcStatePointerValid = true;
> > -#endif
> > -   }
> > -#endif
> > -
> > -   return offset;
> > -}
> > -
> > -static uint32_t
> > -blorp_emit_depth_stencil_state(struct blorp_batch batch,
> > -                               const struct brw_blorp_params *params)
> > -{
> > -#if GEN_GEN >= 8
> > -
> > -   /* On gen8+, DEPTH_STENCIL state is simply an instruction */
> > -   blorp_emit(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds);
> > -   return 0;
> > -
> > -#else /* GEN_GEN <= 7 */
> > -
> > -   /* See the following sections of the Sandy Bridge PRM, Volume 1,
> Part2:
> > -    *   - 7.5.3.1 Depth Buffer Clear
> > -    *   - 7.5.3.2 Depth Buffer Resolve
> > -    *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
> > -    */
> > -   struct GENX(DEPTH_STENCIL_STATE) ds = {
> > -      .DepthBufferWriteEnable = true,
> > -   };
> > -
> > -   if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) {
> > -      ds.DepthTestEnable = true;
> > -      ds.DepthTestFunction = COMPAREFUNCTION_NEVER;
> > -   }
> > -
> > -   uint32_t offset;
> > -   void *state = blorp_alloc_dynamic_state(batch.blorp,
> > -
>  AUB_TRACE_DEPTH_STENCIL_STATE,
> > -                                           GENX(DEPTH_STENCIL_STATE_length)
> * 4,
> > -                                           64, &offset);
> > -   GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds);
> > -
> > -#if GEN_GEN >= 7
> > -   blorp_emit(batch, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), sp) {
> > -      sp.PointertoDEPTH_STENCIL_STATE = offset;
> > -   }
> > -#endif
> > -
> > -   return offset;
> > -
> > -#endif /* GEN_GEN */
> > -}
> > -
> > -struct surface_state_info {
> > -   unsigned num_dwords;
> > -   unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in
> bytes */
> > -   unsigned reloc_dw;
> > -   unsigned aux_reloc_dw;
> > -};
> > -
> > -static const struct surface_state_info surface_state_infos[] = {
> > -   [6] = {6,  32, 1,  0},
> > -   [7] = {8,  32, 1,  6},
> > -   [8] = {13, 64, 8,  10},
> > -   [9] = {16, 64, 8,  10},
> > -};
> > -
> > -static void
> > -blorp_emit_surface_state(struct blorp_context *blorp,
> > -                         const struct brw_blorp_surface_info *surface,
> > -                         uint32_t *state, uint32_t state_offset,
> > -                         bool is_render_target)
> > -{
> > -   const struct surface_state_info ss_info =
> surface_state_infos[GEN_GEN];
> > -
> > -   struct isl_surf surf = surface->surf;
> > -
> > -   if (surf.dim == ISL_SURF_DIM_1D &&
> > -       surf.dim_layout == ISL_DIM_LAYOUT_GEN4_2D) {
> > -      assert(surf.logical_level0_px.height == 1);
> > -      surf.dim = ISL_SURF_DIM_2D;
> > -   }
> > -
> > -   /* Blorp doesn't support HiZ in any of the blit or slow-clear paths
> */
> > -   enum isl_aux_usage aux_usage = surface->aux_usage;
> > -   if (aux_usage == ISL_AUX_USAGE_HIZ)
> > -      aux_usage = ISL_AUX_USAGE_NONE;
> > -
> > -   const uint32_t mocs = is_render_target ? blorp->mocs.rb :
> blorp->mocs.tex;
> > -
> > -   isl_surf_fill_state(blorp->isl_dev, state,
> > -                       .surf = &surf, .view = &surface->view,
> > -                       .aux_surf = &surface->aux_surf, .aux_usage =
> aux_usage,
> > -                       .mocs = mocs, .clear_color =
> surface->clear_color,
> > -                       .x_offset_sa = surface->tile_x_sa,
> > -                       .y_offset_sa = surface->tile_y_sa);
> > -
> > -   blorp_surface_reloc(blorp, state_offset + ss_info.reloc_dw * 4,
> > -                       surface->addr, 0);
> > -
> > -   if (aux_usage != ISL_AUX_USAGE_NONE) {
> > -      /* On gen7 and prior, the bottom 12 bits of the MCS base address
> are
> > -       * used to store other information.  This should be ok, however,
> because
> > -       * surface buffer addresses are always 4K page alinged.
> > -       */
> > -      assert((surface->aux_addr.offset & 0xfff) == 0);
> > -      blorp_surface_reloc(blorp, state_offset + ss_info.aux_reloc_dw *
> 4,
> > -                          surface->aux_addr,
> state[ss_info.aux_reloc_dw]);
> > -   }
> > -}
> > -
> > -static void
> > -blorp_emit_surface_states(struct blorp_batch batch,
> > -                          const struct brw_blorp_params *params)
> > -{
> > -   uint32_t bind_offset, *bind_map;
> > -   void *surface_maps[2];
> > -
> > -   const unsigned ss_size = GENX(RENDER_SURFACE_STATE_length) * 4;
> > -   const unsigned ss_align = GENX(RENDER_SURFACE_STATE_length) > 8 ?
> 64 : 32;
> > -
> > -   unsigned num_surfaces = 1 + (params->src.addr.buffer != NULL);
> > -   blorp_alloc_binding_table(batch.blorp, num_surfaces, ss_size,
> ss_align,
> > -                             &bind_offset, &bind_map, surface_maps);
> > -
> > -   blorp_emit_surface_state(batch.blorp, &params->dst,
> > -                            surface_maps[BLORP_RENDERBUFFER_BT_INDEX],
> > -                            bind_map[BLORP_RENDERBUFFER_BT_INDEX],
> true);
> > -   if (params->src.addr.buffer) {
> > -      blorp_emit_surface_state(batch.blorp, &params->src,
> > -                               surface_maps[BLORP_TEXTURE_BT_INDEX],
> > -                               bind_map[BLORP_TEXTURE_BT_INDEX],
> false);
> > -   }
> > -
> > -#if GEN_GEN >= 7
> > -   blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), bt) {
> > -      bt.PointertoPSBindingTable = bind_offset;
> > -   }
> > -#else
> > -   blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) {
> > -      bt.PSBindingTableChange = true;
> > -      bt.PointertoPSBindingTable = bind_offset;
> > -   }
> > -#endif
> > -}
> > -
> > -static void
> > -blorp_emit_sampler_state(struct blorp_batch batch,
> > -                         const struct brw_blorp_params *params)
> > -{
> > -   struct GENX(SAMPLER_STATE) sampler = {
> > -      .MipModeFilter = MIPFILTER_NONE,
> > -      .MagModeFilter = MAPFILTER_LINEAR,
> > -      .MinModeFilter = MAPFILTER_LINEAR,
> > -      .MinLOD = 0,
> > -      .MaxLOD = 0,
> > -      .TCXAddressControlMode = TCM_CLAMP,
> > -      .TCYAddressControlMode = TCM_CLAMP,
> > -      .TCZAddressControlMode = TCM_CLAMP,
> > -      .MaximumAnisotropy = RATIO21,
> > -      .RAddressMinFilterRoundingEnable = true,
> > -      .RAddressMagFilterRoundingEnable = true,
> > -      .VAddressMinFilterRoundingEnable = true,
> > -      .VAddressMagFilterRoundingEnable = true,
> > -      .UAddressMinFilterRoundingEnable = true,
> > -      .UAddressMagFilterRoundingEnable = true,
> > -      .NonnormalizedCoordinateEnable = true,
> > -   };
> > -
> > -   uint32_t offset;
> > -   void *state = blorp_alloc_dynamic_state(batch.blorp,
> > -                                           AUB_TRACE_SAMPLER_STATE,
> > -                                           GENX(SAMPLER_STATE_length) *
> 4,
> > -                                           32, &offset);
> > -   GENX(SAMPLER_STATE_pack)(NULL, state, &sampler);
> > -
> > -#if GEN_GEN >= 7
> > -   blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) {
> > -      ssp.PointertoPSSamplerState = offset;
> > -   }
> > -#else
> > -   blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) {
> > -      ssp.VSSamplerStateChange = true;
> > -      ssp.GSSamplerStateChange = true;
> > -      ssp.PSSamplerStateChange = true;
> > -      ssp.PointertoPSSamplerState = offset;
> > -   }
> > -#endif
> > -}
> > -
> > -/* 3DSTATE_VIEWPORT_STATE_POINTERS */
> > -static void
> > -blorp_emit_viewport_state(struct blorp_batch batch,
> > -                          const struct brw_blorp_params *params)
> > -{
> > -   uint32_t cc_vp_offset;
> > -
> > -   void *state = blorp_alloc_dynamic_state(batch.blorp,
> > -                                           AUB_TRACE_CC_VP_STATE,
> > -                                           GENX(CC_VIEWPORT_length) *
> 4, 32,
> > -                                           &cc_vp_offset);
> > -
> > -   GENX(CC_VIEWPORT_pack)(&batch, state,
> > -      &(struct GENX(CC_VIEWPORT)) {
> > -         .MinimumDepth = 0.0,
> > -         .MaximumDepth = 1.0,
> > -      });
> > -
> > -#if GEN_GEN >= 7
> > -   blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) {
> > -      vsp.CCViewportPointer = cc_vp_offset;
> > -   }
> > -#else
> > -   blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) {
> > -      vsp.CCViewportStateChange = true;
> > -      vsp.PointertoCC_VIEWPORT = cc_vp_offset;
> > -   }
> > -#endif
> > -}
> > -
> > -
> > -/**
> > - * \brief Execute a blit or render pass operation.
> > - *
> > - * To execute the operation, this function manually constructs and
> emits a
> > - * batch to draw a rectangle primitive. The batchbuffer is flushed
> before
> > - * constructing and after emitting the batch.
> > - *
> > - * This function alters no GL state.
> > - */
> > -static void
> > -blorp_exec(struct blorp_context *blorp, void *batch_data,
> > -           const struct brw_blorp_params *params)
> > -{
> > -   struct blorp_batch batch = {
> > -      .blorp = blorp,
> > -      .batch = batch_data,
> > -   };
> > -
> > -   uint32_t blend_state_offset = 0;
> > -   uint32_t color_calc_state_offset = 0;
> > -   uint32_t depth_stencil_state_offset;
> > -
> > -   blorp_emit_vertex_buffers(batch, params);
> > -   blorp_emit_vertex_elements(batch, params);
> > -
> > -   emit_urb_config(batch, params);
> > -
> > -   if (params->wm_prog_data) {
> > -      blend_state_offset = blorp_emit_blend_state(batch, params);
> > -      color_calc_state_offset = blorp_emit_color_calc_state(batch,
> params);
> > -   }
> > -   depth_stencil_state_offset = blorp_emit_depth_stencil_state(batch,
> params);
> > -
> > -#if GEN_GEN <= 6
> > -   /* 3DSTATE_CC_STATE_POINTERS
> > -    *
> > -    * The pointer offsets are relative to
> > -    * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
> > -    *
> > -    * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
>
> Here also.
>
> > -    *
> > -    * The dynamic state emit helpers emit their own STATE_POINTERS
> packets on
> > -    * gen7+.  However, on gen6 and earlier, they're all lumpped
> together in
> > -    * one CC_STATE_POINTERS packet so we have to emit that here.
> > -    */
> > -   blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), cc) {
> > -      cc.BLEND_STATEChange = true;
> > -      cc.COLOR_CALC_STATEChange = true;
> > -      cc.DEPTH_STENCIL_STATEChange = true;
> > -      cc.PointertoBLEND_STATE = blend_state_offset;
> > -      cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset;
> > -      cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset;
> > -   }
> > -#else
> > -   (void)blend_state_offset;
> > -   (void)color_calc_state_offset;
> > -   (void)depth_stencil_state_offset;
> > -#endif
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_CONSTANT_VS), vs);
> > -#if GEN_GEN >= 7
> > -   blorp_emit(batch, GENX(3DSTATE_CONSTANT_HS), hs);
> > -   blorp_emit(batch, GENX(3DSTATE_CONSTANT_DS), DS);
> > -#endif
> > -   blorp_emit(batch, GENX(3DSTATE_CONSTANT_GS), gs);
> > -   blorp_emit(batch, GENX(3DSTATE_CONSTANT_PS), ps);
> > -
> > -   if (params->wm_prog_data)
> > -      blorp_emit_surface_states(batch, params);
> > -
> > -   if (params->src.addr.buffer)
> > -      blorp_emit_sampler_state(batch, params);
> > -
> > -   blorp_emit_3dstate_multisample(batch.blorp, batch.batch,
> > -                                  params->dst.surf.samples);
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_SAMPLE_MASK), mask) {
> > -      mask.SampleMask = (1 << params->dst.surf.samples) - 1;
> > -   }
> > -
> > -   /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
> > -    * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
> > -    *
> > -    *   [DevSNB] A pipeline flush must be programmed prior to a
> > -    *   3DSTATE_VS command that causes the VS Function Enable to
> > -    *   toggle. Pipeline flush can be executed by sending a PIPE_CONTROL
> > -    *   command with CS stall bit set and a post sync operation.
> > -    *
> > -    * We've already done one at the start of the BLORP operation.
>
> And here.
>
> > -    */
> > -   blorp_emit(batch, GENX(3DSTATE_VS), vs);
> > -#if GEN_GEN >= 7
> > -   blorp_emit(batch, GENX(3DSTATE_HS), hs);
> > -   blorp_emit(batch, GENX(3DSTATE_TE), te);
> > -   blorp_emit(batch, GENX(3DSTATE_DS), DS);
> > -   blorp_emit(batch, GENX(3DSTATE_STREAMOUT), so);
> > -#endif
> > -   blorp_emit(batch, GENX(3DSTATE_GS), gs);
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_CLIP), clip) {
> > -      clip.PerspectiveDivideDisable = true;
> > -   }
> > -
> > -   blorp_emit_sf_config(batch, params);
> > -   blorp_emit_ps_config(batch, params);
> > -
> > -   blorp_emit_viewport_state(batch, params);
> > -
> > -   if (params->depth.addr.buffer) {
> > -      blorp_emit_depth_stencil_config(batch, params);
> > -   } else {
> > -      blorp_emit(batch, GENX(3DSTATE_DEPTH_BUFFER), db) {
> > -         db.SurfaceType = SURFTYPE_NULL;
> > -         db.SurfaceFormat = D32_FLOAT;
> > -      }
> > -      blorp_emit(batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz);
> > -      blorp_emit(batch, GENX(3DSTATE_STENCIL_BUFFER), sb);
> > -   }
> > -
> > -   /* 3DSTATE_CLEAR_PARAMS
> > -    *
> > -    * From the Sandybridge PRM, Volume 2, Part 1, Section
> 3DSTATE_CLEAR_PARAMS:
> > -    *   [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the
> DEPTH_BUFFER_STATE
> > -    *   packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
>
> And finally here. Otherwise it looks that code is just moved.
>
> I would keep the documentation, and with that:
>
> Reviewed-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
>
> > -    */
> > -   blorp_emit(batch, GENX(3DSTATE_CLEAR_PARAMS), clear) {
> > -      clear.DepthClearValueValid = true;
> > -      clear.DepthClearValue = params->depth.clear_color.u32[0];
> > -   }
> > -
> > -   blorp_emit(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
> > -      rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0)
> - 1;
> > -      rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0)
> - 1;
> > -   }
> > -
> > -   blorp_emit(batch, GENX(3DPRIMITIVE), prim) {
> > -      prim.VertexAccessType = SEQUENTIAL;
> > -      prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
> > -      prim.VertexCountPerInstance = 3;
> > -      prim.InstanceCount = params->num_layers;
> > -   }
> > -}
> > -
> >  void
> >  genX(blorp_exec)(struct brw_context *brw,
> >                   const struct brw_blorp_params *params)
> > diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.h
> b/src/mesa/drivers/dri/i965/genX_blorp_exec.h
> > new file mode 100644
> > index 0000000..02a0397
> > --- /dev/null
> > +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.h
> > @@ -0,0 +1,1121 @@
> > +/*
> > + * Copyright © 2016 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> obtaining a
> > + * copy of this software and associated documentation files (the
> "Software"),
> > + * to deal in the Software without restriction, including without
> limitation
> > + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the
> next
> > + * paragraph) shall be included in all copies or substantial portions
> of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS
> > + * IN THE SOFTWARE.
> > + */
> > +
> > +#include "blorp_priv.h"
> > +#include "brw_device_info.h"
> > +#include "intel_aub.h"
> > +
> > +/**
> > + * This file provides the blorp pipeline setup and execution
> functionality.
> > + * It defines the following function:
> > + *
> > + * static void
> > + * blorp_exec(struct blorp_context *blorp, void *batch_data,
> > + *            const struct brw_blorp_params *params);
> > + *
> > + * It is the job of whoever includes this header to wrap this in
> something
> > + * to get an externally visible symbol.
> > + *
> > + * In order for the blorp_exec function to work, the driver must provide
> > + * implementations of the following static helper functions.
> > + */
> > +
> > +static void *
> > +blorp_emit_dwords(struct blorp_context *blorp, void *batch, unsigned n);
> > +
> > +static uint64_t
> > +blorp_emit_reloc(struct blorp_context *blorp, void *batch,
> > +                 void *location, struct blorp_address address, uint32_t
> delta);
> > +
> > +static void *
> > +blorp_alloc_dynamic_state(struct blorp_context *blorp,
> > +                          enum aub_state_struct_type type,
> > +                          uint32_t size,
> > +                          uint32_t alignment,
> > +                          uint32_t *offset);
> > +static void *
> > +blorp_alloc_vertex_buffer(struct blorp_context *blorp, uint32_t size,
> > +                          struct blorp_address *addr);
> > +
> > +static void
> > +blorp_alloc_binding_table(struct blorp_context *blorp, unsigned
> num_entries,
> > +                          unsigned state_size, unsigned state_alignment,
> > +                          uint32_t *bt_offset, uint32_t **bt_map,
> > +                          void **surface_maps);
> > +static void
> > +blorp_surface_reloc(struct blorp_context *blorp, uint32_t ss_offset,
> > +                    struct blorp_address address, uint32_t delta);
> > +
> > +static void
> > +blorp_emit_urb_config(struct blorp_context *blorp, void *batch,
> > +                      unsigned vs_entry_size);
> > +static void
> > +blorp_emit_3dstate_multisample(struct blorp_context *blorp, void
> *batch,
> > +                               unsigned samples);
> > +
> > +/***** BEGIN blorp_exec implementation ******/
> > +
> > +#include "genxml/gen_macros.h"
> > +
> > +struct blorp_batch {
> > +   struct blorp_context *blorp;
> > +   void *batch;
> > +};
> > +
> > +#define __gen_address_type struct blorp_address
> > +#define __gen_user_data struct blorp_batch
> > +
> > +static uint64_t
> > +__gen_combine_address(struct blorp_batch *batch, void *location,
> > +                      struct blorp_address address, uint32_t delta)
> > +{
> > +   if (address.buffer == NULL) {
> > +      return address.offset + delta;
> > +   } else {
> > +      return blorp_emit_reloc(batch->blorp, batch->batch,
> > +                              location, address, delta);
> > +   }
> > +}
> > +
> > +#include "genxml/genX_pack.h"
> > +
> > +#define _blorp_cmd_length(cmd) cmd ## _length
> > +#define _blorp_cmd_length_bias(cmd) cmd ## _length_bias
> > +#define _blorp_cmd_header(cmd) cmd ## _header
> > +#define _blorp_cmd_pack(cmd) cmd ## _pack
> > +
> > +#define blorp_emit(batch, cmd, name)                        \
> > +   for (struct cmd name = { _blorp_cmd_header(cmd) },       \
> > +        *_dst = blorp_emit_dwords(batch.blorp, batch.batch, \
> > +                                  _blorp_cmd_length(cmd));  \
> > +        __builtin_expect(_dst != NULL, 1);                  \
> > +        _blorp_cmd_pack(cmd)(&batch, (void *)_dst, &name),   \
> > +        _dst = NULL)
> > +
> > +#define blorp_emitn(batch, cmd, n) ({
>  \
> > +      uint32_t *_dw = blorp_emit_dwords(batch.blorp, batch.batch, n);
>  \
> > +      struct cmd template = {
>  \
> > +         _blorp_cmd_header(cmd),
> \
> > +         .DWordLength = n - _blorp_cmd_length_bias(cmd),
> \
> > +      };
> \
> > +      _blorp_cmd_pack(cmd)(&batch, _dw, &template);
>  \
> > +      _dw + 1; /* Array starts at dw[1] */
> \
> > +   })
> > +
> > +/* Once vertex fetcher has written full VUE entries with complete
> > + * header the space requirement is as follows per vertex (in bytes):
> > + *
> > + *     Header    Position    Program constants
> > + *   +--------+------------+-------------------+
> > + *   |   16   |     16     |      n x 16       |
> > + *   +--------+------------+-------------------+
> > + *
> > + * where 'n' stands for number of varying inputs expressed as vec4s.
> > + *
> > + * The URB size is in turn expressed in 64 bytes (512 bits).
> > + */
> > +static inline unsigned
> > +gen7_blorp_get_vs_entry_size(const struct brw_blorp_params *params)
> > +{
> > +    const unsigned num_varyings =
> > +       params->wm_prog_data ? params->wm_prog_data->num_varying_inputs
> : 0;
> > +    const unsigned total_needed = 16 + 16 + num_varyings * 16;
> > +
> > +   return DIV_ROUND_UP(total_needed, 64);
> > +}
> > +
> > +/* 3DSTATE_URB_VS
> > + * 3DSTATE_URB_HS
> > + * 3DSTATE_URB_DS
> > + * 3DSTATE_URB_GS
> > + *
> > + * If the 3DSTATE_URB_VS is emitted, than the others must be also.
> > + * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1
> 3DSTATE_URB_VS:
> > + *
> > + *     3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
> > + *     programmed in order for the programming of this state to be
> > + *     valid.
> > + */
> > +static void
> > +emit_urb_config(struct blorp_batch batch,
> > +                const struct brw_blorp_params *params)
> > +{
> > +   blorp_emit_urb_config(batch.blorp, batch.batch,
> > +                         gen7_blorp_get_vs_entry_size(params));
> > +}
> > +
> > +static void
> > +blorp_emit_vertex_data(struct blorp_batch batch,
> > +                       const struct brw_blorp_params *params,
> > +                       struct blorp_address *addr,
> > +                       uint32_t *size)
> > +{
> > +   const float vertices[] = {
> > +      /* v0 */ (float)params->x0, (float)params->y1,
> > +      /* v1 */ (float)params->x1, (float)params->y1,
> > +      /* v2 */ (float)params->x0, (float)params->y0,
> > +   };
> > +
> > +   void *data = blorp_alloc_vertex_buffer(batch.blorp,
> sizeof(vertices), addr);
> > +   memcpy(data, vertices, sizeof(vertices));
> > +   *size = sizeof(vertices);
> > +}
> > +
> > +static void
> > +blorp_emit_input_varying_data(struct blorp_batch batch,
> > +                              const struct brw_blorp_params *params,
> > +                              struct blorp_address *addr,
> > +                              uint32_t *size)
> > +{
> > +   const unsigned vec4_size_in_bytes = 4 * sizeof(float);
> > +   const unsigned max_num_varyings =
> > +      DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes);
> > +   const unsigned num_varyings = params->wm_prog_data->num_
> varying_inputs;
> > +
> > +   *size = num_varyings * vec4_size_in_bytes;
> > +
> > +   const float *const inputs_src = (const float *)&params->wm_inputs;
> > +   float *inputs = blorp_alloc_vertex_buffer(batch.blorp, *size, addr);
> > +
> > +   /* Walk over the attribute slots, determine if the attribute is used
> by
> > +    * the program and when necessary copy the values from the input
> storage to
> > +    * the vertex data buffer.
> > +    */
> > +   for (unsigned i = 0; i < max_num_varyings; i++) {
> > +      const gl_varying_slot attr = VARYING_SLOT_VAR0 + i;
> > +
> > +      if (!(params->wm_prog_data->inputs_read & BITFIELD64_BIT(attr)))
> > +         continue;
> > +
> > +      memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes);
> > +
> > +      inputs += 4;
> > +   }
> > +}
> > +
> > +static void
> > +blorp_emit_vertex_buffers(struct blorp_batch batch,
> > +                          const struct brw_blorp_params *params)
> > +{
> > +   struct GENX(VERTEX_BUFFER_STATE) vb[2];
> > +   memset(vb, 0, sizeof(vb));
> > +
> > +   unsigned num_buffers = 1;
> > +
> > +   uint32_t size;
> > +   blorp_emit_vertex_data(batch, params, &vb[0].BufferStartingAddress,
> &size);
> > +   vb[0].VertexBufferIndex = 0;
> > +   vb[0].BufferPitch = 2 * sizeof(float);
> > +   vb[0].VertexBufferMOCS = batch.blorp->mocs.vb;
> > +#if GEN_GEN >= 7
> > +   vb[0].AddressModifyEnable = true;
> > +#endif
> > +#if GEN_GEN >= 8
> > +   vb[0].BufferSize = size;
> > +#else
> > +   vb[0].BufferAccessType = VERTEXDATA;
> > +   vb[0].EndAddress = vb[0].BufferStartingAddress;
> > +   vb[0].EndAddress.offset += size - 1;
> > +#endif
> > +
> > +   if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs)
> {
> > +      blorp_emit_input_varying_data(batch, params,
> > +                                    &vb[1].BufferStartingAddress,
> &size);
> > +      vb[1].VertexBufferIndex = 1;
> > +      vb[1].BufferPitch = 0;
> > +      vb[1].VertexBufferMOCS = batch.blorp->mocs.vb;
> > +#if GEN_GEN >= 7
> > +      vb[1].AddressModifyEnable = true;
> > +#endif
> > +#if GEN_GEN >= 8
> > +      vb[1].BufferSize = size;
> > +#else
> > +      vb[1].BufferAccessType = INSTANCEDATA;
> > +      vb[1].EndAddress = vb[1].BufferStartingAddress;
> > +      vb[1].EndAddress.offset += size - 1;
> > +#endif
> > +      num_buffers++;
> > +   }
> > +
> > +   const unsigned num_dwords =
> > +      1 + GENX(VERTEX_BUFFER_STATE_length) * num_buffers;
> > +   uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS),
> num_dwords);
> > +
> > +   for (unsigned i = 0; i < num_buffers; i++) {
> > +      GENX(VERTEX_BUFFER_STATE_pack)(&batch, dw, &vb[i]);
> > +      dw += GENX(VERTEX_BUFFER_STATE_length);
> > +   }
> > +}
> > +
> > +static void
> > +blorp_emit_vertex_elements(struct blorp_batch batch,
> > +                           const struct brw_blorp_params *params)
> > +{
> > +   const unsigned num_varyings =
> > +      params->wm_prog_data ? params->wm_prog_data->num_varying_inputs
> : 0;
> > +   const unsigned num_elements = 2 + num_varyings;
> > +
> > +   struct GENX(VERTEX_ELEMENT_STATE) ve[num_elements];
> > +   memset(ve, 0, num_elements * sizeof(*ve));
> > +
> > +   /* Setup VBO for the rectangle primitive..
> > +    *
> > +    * A rectangle primitive (3DPRIM_RECTLIST) consists of only three
> > +    * vertices. The vertices reside in screen space with DirectX
> > +    * coordinates (that is, (0, 0) is the upper left corner).
> > +    *
> > +    *   v2 ------ implied
> > +    *    |        |
> > +    *    |        |
> > +    *   v0 ----- v1
> > +    *
> > +    * Since the VS is disabled, the clipper loads each VUE directly from
> > +    * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
> > +    * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as
> follows:
> > +    *   dw0: Reserved, MBZ.
> > +    *   dw1: Render Target Array Index. The HiZ op does not use indexed
> > +    *        vertices, so set the dword to 0.
> > +    *   dw2: Viewport Index. The HiZ op disables viewport mapping and
> > +    *        scissoring, so set the dword to 0.
> > +    *   dw3: Point Width: The HiZ op does not emit the POINTLIST
> primitive,
> > +    *        so set the dword to 0.
> > +    *   dw4: Vertex Position X.
> > +    *   dw5: Vertex Position Y.
> > +    *   dw6: Vertex Position Z.
> > +    *   dw7: Vertex Position W.
> > +    *
> > +    *   dw8: Flat vertex input 0
> > +    *   dw9: Flat vertex input 1
> > +    *   ...
> > +    *   dwn: Flat vertex input n - 8
> > +    *
> > +    * For details, see the Sandybridge PRM, Volume 2, Part 1, Section
> 1.5.1
> > +    * "Vertex URB Entry (VUE) Formats".
> > +    *
> > +    * Only vertex position X and Y are going to be variable, Z is fixed
> to
> > +    * zero and W to one. Header words dw0-3 are all zero. There is no
> need to
> > +    * include the fixed values in the vertex buffer. Vertex fetcher can
> be
> > +    * instructed to fill vertex elements with constant values of one
> and zero
> > +    * instead of reading them from the buffer.
> > +    * Flat inputs are program constants that are not interpolated.
> Moreover
> > +    * their values will be the same between vertices.
> > +    *
> > +    * See the vertex element setup below.
> > +    */
> > +   ve[0].VertexBufferIndex = 0;
> > +   ve[0].Valid = true;
> > +   ve[0].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
> > +   ve[0].SourceElementOffset = 0;
> > +   ve[0].Component0Control = VFCOMP_STORE_0;
> > +   ve[0].Component1Control = VFCOMP_STORE_0;
> > +   ve[0].Component2Control = VFCOMP_STORE_0;
> > +   ve[0].Component3Control = VFCOMP_STORE_0;
> > +
> > +   ve[1].VertexBufferIndex = 0;
> > +   ve[1].Valid = true;
> > +   ve[1].SourceElementFormat = ISL_FORMAT_R32G32_FLOAT;
> > +   ve[1].SourceElementOffset = 0;
> > +   ve[1].Component0Control = VFCOMP_STORE_SRC;
> > +   ve[1].Component1Control = VFCOMP_STORE_SRC;
> > +   ve[1].Component2Control = VFCOMP_STORE_0;
> > +   ve[1].Component3Control = VFCOMP_STORE_1_FP;
> > +
> > +   for (unsigned i = 0; i < num_varyings; ++i) {
> > +      ve[i + 2].VertexBufferIndex = 1;
> > +      ve[i + 2].Valid = true;
> > +      ve[i + 2].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
> > +      ve[i + 2].SourceElementOffset = i * 4 * sizeof(float);
> > +      ve[i + 2].Component0Control = VFCOMP_STORE_SRC;
> > +      ve[i + 2].Component1Control = VFCOMP_STORE_SRC;
> > +      ve[i + 2].Component2Control = VFCOMP_STORE_SRC;
> > +      ve[i + 2].Component3Control = VFCOMP_STORE_SRC;
> > +   }
> > +
> > +   const unsigned num_dwords =
> > +      1 + GENX(VERTEX_ELEMENT_STATE_length) * num_elements;
> > +   uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_ELEMENTS),
> num_dwords);
> > +
> > +   for (unsigned i = 0; i < num_elements; i++) {
> > +      GENX(VERTEX_ELEMENT_STATE_pack)(&batch, dw, &ve[i]);
> > +      dw += GENX(VERTEX_ELEMENT_STATE_length);
> > +   }
> > +
> > +#if GEN_GEN >= 8
> > +   blorp_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs);
> > +
> > +   for (unsigned i = 0; i < num_elements; i++) {
> > +      blorp_emit(batch, GENX(3DSTATE_VF_INSTANCING), vf) {
> > +         vf.VertexElementIndex = i;
> > +         vf.InstancingEnable = false;
> > +      }
> > +   }
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
> > +      topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
> > +   }
> > +#endif
> > +}
> > +
> > +static void
> > +blorp_emit_sf_config(struct blorp_batch batch,
> > +                     const struct brw_blorp_params *params)
> > +{
> > +   const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
> > +
> > +#if GEN_GEN >= 8
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_SF), sf);
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_RASTER), raster) {
> > +      raster.CullMode = CULLMODE_NONE;
> > +   }
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_SBE), sbe) {
> > +      sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
> > +      sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
> > +      sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_
> data);
> > +      sbe.ForceVertexURBEntryReadLength = true;
> > +      sbe.ForceVertexURBEntryReadOffset = true;
> > +      sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
> > +
> > +#if GEN_GEN >= 9
> > +      for (unsigned i = 0; i < 32; i++)
> > +         sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
> > +#endif
> > +   }
> > +
> > +#elif GEN_GEN >= 7
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_SF), sf) {
> > +      sf.FrontFaceFillMode = FILL_MODE_SOLID;
> > +      sf.BackFaceFillMode = FILL_MODE_SOLID;
> > +
> > +      sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
> > +         MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
> > +
> > +#if GEN_GEN == 7
> > +      sf.DepthBufferSurfaceFormat = params->depth_format;
> > +#endif
> > +   }
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_SBE), sbe) {
> > +      sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
> > +      if (prog_data) {
> > +         sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
> > +         sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_
> data);
> > +         sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
> > +      } else {
> > +         sbe.NumberofSFOutputAttributes = 0;
> > +         sbe.VertexURBEntryReadLength = 1;
> > +      }
> > +   }
> > +
> > +#else /* GEN_GEN <= 6 */
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_SF), sf) {
> > +      sf.FrontFaceFillMode = FILL_MODE_SOLID;
> > +      sf.BackFaceFillMode = FILL_MODE_SOLID;
> > +
> > +      sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
> > +         MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
> > +
> > +      sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
> > +      if (prog_data) {
> > +         sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
> > +         sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_
> data);
> > +         sf.ConstantInterpolationEnable = prog_data->flat_inputs;
> > +      } else {
> > +         sf.NumberofSFOutputAttributes = 0;
> > +         sf.VertexURBEntryReadLength = 1;
> > +      }
> > +   }
> > +
> > +#endif /* GEN_GEN */
> > +}
> > +
> > +static void
> > +blorp_emit_ps_config(struct blorp_batch batch,
> > +                     const struct brw_blorp_params *params)
> > +{
> > +   const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
> > +
> > +#if GEN_GEN >= 8
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_WM), wm);
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_PS), ps) {
> > +      if (params->src.addr.buffer) {
> > +         ps.SamplerCount = 1; /* Up to 4 samplers */
> > +         ps.BindingTableEntryCount = 2;
> > +      } else {
> > +         ps.BindingTableEntryCount = 1;
> > +      }
> > +
> > +      ps.DispatchGRFStartRegisterForConstantSetupData0 =
> > +         prog_data->first_curbe_grf_0;
> > +      ps.DispatchGRFStartRegisterForConstantSetupData2 =
> > +         prog_data->first_curbe_grf_2;
> > +
> > +      ps._8PixelDispatchEnable = prog_data->dispatch_8;
> > +      ps._16PixelDispatchEnable = prog_data->dispatch_16;
> > +
> > +      ps.KernelStartPointer0 = params->wm_prog_kernel;
> > +      ps.KernelStartPointer2 =
> > +         params->wm_prog_kernel + prog_data->ksp_offset_2;
> > +
> > +      /* 3DSTATE_PS expects the number of threads per PSD, which is
> always 64;
> > +       * it implicitly scales for different GT levels (which have some
> # of
> > +       * PSDs).
> > +       *
> > +       * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
> > +       */
> > +      if (GEN_GEN >= 9)
> > +         ps.MaximumNumberofThreadsPerPSD = 64 - 1;
> > +      else
> > +         ps.MaximumNumberofThreadsPerPSD = 64 - 2;
> > +
> > +      switch (params->fast_clear_op) {
> > +#if GEN_GEN >= 9
> > +      case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
> > +         ps.RenderTargetResolveType = RESOLVE_PARTIAL;
> > +         break;
> > +      case (3 << 6): /* GEN9_PS_RENDER_TARGET_RESOLVE_FULL */
> > +         ps.RenderTargetResolveType = RESOLVE_FULL;
> > +         break;
> > +#else
> > +      case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
> > +         ps.RenderTargetResolveEnable = true;
> > +         break;
> > +#endif
> > +      case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */
> > +         ps.RenderTargetFastClearEnable = true;
> > +         break;
> > +      }
> > +   }
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) {
> > +      psx.PixelShaderValid = true;
> > +
> > +      if (params->src.addr.buffer)
> > +         psx.PixelShaderKillsPixel = true;
> > +
> > +      psx.AttributeEnable = prog_data->num_varying_inputs > 0;
> > +
> > +      if (prog_data && prog_data->persample_msaa_dispatch)
> > +         psx.PixelShaderIsPerSample = true;
> > +   }
> > +
> > +#elif GEN_GEN >= 7
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_WM), wm) {
> > +      switch (params->hiz_op) {
> > +      case GEN6_HIZ_OP_DEPTH_CLEAR:
> > +         wm.DepthBufferClear = true;
> > +         break;
> > +      case GEN6_HIZ_OP_DEPTH_RESOLVE:
> > +         wm.DepthBufferResolveEnable = true;
> > +         break;
> > +      case GEN6_HIZ_OP_HIZ_RESOLVE:
> > +         wm.HierarchicalDepthBufferResolveEnable = true;
> > +         break;
> > +      case GEN6_HIZ_OP_NONE:
> > +         break;
> > +      default:
> > +         unreachable("not reached");
> > +      }
> > +
> > +      if (prog_data)
> > +         wm.ThreadDispatchEnable = true;
> > +
> > +      if (params->src.addr.buffer)
> > +         wm.PixelShaderKillPixel = true;
> > +
> > +      if (params->dst.surf.samples > 1) {
> > +         wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
> > +         wm.MultisampleDispatchMode =
> > +            (prog_data && prog_data->persample_msaa_dispatch) ?
> > +            MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
> > +      } else {
> > +         wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
> > +         wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
> > +      }
> > +   }
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_PS), ps) {
> > +      ps.MaximumNumberofThreads = batch.blorp->isl_dev->info->max_wm_threads
> - 1;
> > +
> > +#if GEN_IS_HASWELL
> > +      ps.SampleMask = 1;
> > +#endif
> > +
> > +      if (prog_data) {
> > +         ps.DispatchGRFStartRegisterforConstantSetupData0 =
> > +            prog_data->first_curbe_grf_0;
> > +         ps.DispatchGRFStartRegisterforConstantSetupData2 =
> > +            prog_data->first_curbe_grf_2;
> > +
> > +         ps.KernelStartPointer0 = params->wm_prog_kernel;
> > +         ps.KernelStartPointer2 =
> > +            params->wm_prog_kernel + prog_data->ksp_offset_2;
> > +
> > +         ps._8PixelDispatchEnable = prog_data->dispatch_8;
> > +         ps._16PixelDispatchEnable = prog_data->dispatch_16;
> > +
> > +         ps.AttributeEnable = prog_data->num_varying_inputs > 0;
> > +      } else {
> > +         /* Gen7 hardware gets angry if we don't enable at least one
> dispatch
> > +          * mode, so just enable 16-pixel dispatch if we don't have a
> program.
> > +          */
> > +         ps._16PixelDispatchEnable = true;
> > +      }
> > +
> > +      if (params->src.addr.buffer)
> > +         ps.SamplerCount = 1; /* Up to 4 samplers */
> > +
> > +      switch (params->fast_clear_op) {
> > +      case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
> > +         ps.RenderTargetResolveEnable = true;
> > +         break;
> > +      case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */
> > +         ps.RenderTargetFastClearEnable = true;
> > +         break;
> > +      }
> > +   }
> > +
> > +#else /* GEN_GEN <= 6 */
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_WM), wm) {
> > +      wm.MaximumNumberofThreads = batch.blorp->isl_dev->info->max_wm_threads
> - 1;
> > +
> > +      switch (params->hiz_op) {
> > +      case GEN6_HIZ_OP_DEPTH_CLEAR:
> > +         wm.DepthBufferClear = true;
> > +         break;
> > +      case GEN6_HIZ_OP_DEPTH_RESOLVE:
> > +         wm.DepthBufferResolveEnable = true;
> > +         break;
> > +      case GEN6_HIZ_OP_HIZ_RESOLVE:
> > +         wm.HierarchicalDepthBufferResolveEnable = true;
> > +         break;
> > +      case GEN6_HIZ_OP_NONE:
> > +         break;
> > +      default:
> > +         unreachable("not reached");
> > +      }
> > +
> > +      if (prog_data) {
> > +         wm.ThreadDispatchEnable = true;
> > +
> > +         wm.DispatchGRFStartRegisterforConstantSetupData0 =
> > +            prog_data->first_curbe_grf_0;
> > +         wm.DispatchGRFStartRegisterforConstantSetupData2 =
> > +            prog_data->first_curbe_grf_2;
> > +
> > +         wm.KernelStartPointer0 = params->wm_prog_kernel;
> > +         wm.KernelStartPointer2 =
> > +            params->wm_prog_kernel + prog_data->ksp_offset_2;
> > +
> > +         wm._8PixelDispatchEnable = prog_data->dispatch_8;
> > +         wm._16PixelDispatchEnable = prog_data->dispatch_16;
> > +
> > +         wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
> > +      }
> > +
> > +      if (params->src.addr.buffer) {
> > +         wm.SamplerCount = 1; /* Up to 4 samplers */
> > +         wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on
> */
> > +      }
> > +
> > +      if (params->dst.surf.samples > 1) {
> > +         wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
> > +         wm.MultisampleDispatchMode =
> > +            (prog_data && prog_data->persample_msaa_dispatch) ?
> > +            MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
> > +      } else {
> > +         wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
> > +         wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
> > +      }
> > +   }
> > +
> > +#endif /* GEN_GEN */
> > +}
> > +
> > +
> > +static void
> > +blorp_emit_depth_stencil_config(struct blorp_batch batch,
> > +                                const struct brw_blorp_params *params)
> > +{
> > +#if GEN_GEN >= 7
> > +   const uint32_t mocs = 1; /* GEN7_MOCS_L3 */
> > +#else
> > +   const uint32_t mocs = 0;
> > +#endif
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_DEPTH_BUFFER), db) {
> > +      switch (params->depth.surf.dim) {
> > +      case ISL_SURF_DIM_1D:
> > +         db.SurfaceType = SURFTYPE_1D;
> > +         break;
> > +      case ISL_SURF_DIM_2D:
> > +         db.SurfaceType = SURFTYPE_2D;
> > +         break;
> > +      case ISL_SURF_DIM_3D:
> > +         db.SurfaceType = SURFTYPE_3D;
> > +         break;
> > +      }
> > +
> > +      db.SurfaceFormat = params->depth_format;
> > +
> > +#if GEN_GEN >= 7
> > +      db.DepthWriteEnable = true;
> > +#endif
> > +
> > +#if GEN_GEN <= 6
> > +      db.TiledSurface = true;
> > +      db.TileWalk = TILEWALK_YMAJOR;
> > +      db.MIPMapLayoutMode = MIPLAYOUT_BELOW;
> > +      db.SeparateStencilBufferEnable = true;
> > +#endif
> > +
> > +      db.HierarchicalDepthBufferEnable = true;
> > +
> > +      db.Width = params->depth.surf.logical_level0_px.width - 1;
> > +      db.Height = params->depth.surf.logical_level0_px.height - 1;
> > +      db.RenderTargetViewExtent = db.Depth =
> > +         MAX2(params->depth.surf.logical_level0_px.depth,
> > +              params->depth.surf.logical_level0_px.array_len) - 1;
> > +
> > +      db.LOD = params->depth.view.base_level;
> > +      db.MinimumArrayElement = params->depth.view.base_array_layer;
> > +
> > +      db.SurfacePitch = params->depth.surf.row_pitch - 1;
> > +      db.SurfaceBaseAddress = params->depth.addr;
> > +      db.DepthBufferMOCS = mocs;
> > +   }
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) {
> > +      hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1;
> > +      hiz.SurfaceBaseAddress = params->depth.aux_addr;
> > +      hiz.HierarchicalDepthBufferMOCS = mocs;
> > +   }
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_STENCIL_BUFFER), sb);
> > +}
> > +
> > +static uint32_t
> > +blorp_emit_blend_state(struct blorp_batch batch,
> > +                       const struct brw_blorp_params *params)
> > +{
> > +   struct GENX(BLEND_STATE) blend;
> > +   memset(&blend, 0, sizeof(blend));
> > +
> > +   for (unsigned i = 0; i < params->num_draw_buffers; ++i) {
> > +      blend.Entry[i].PreBlendColorClampEnable = true;
> > +      blend.Entry[i].PostBlendColorClampEnable = true;
> > +      blend.Entry[i].ColorClampRange = COLORCLAMP_RTFORMAT;
> > +
> > +      blend.Entry[i].WriteDisableRed = params->color_write_disable[0];
> > +      blend.Entry[i].WriteDisableGreen = params->color_write_disable[1]
> ;
> > +      blend.Entry[i].WriteDisableBlue = params->color_write_disable[2];
> > +      blend.Entry[i].WriteDisableAlpha = params->color_write_disable[3]
> ;
> > +   }
> > +
> > +   uint32_t offset;
> > +   void *state = blorp_alloc_dynamic_state(batch.blorp,
> > +                                           AUB_TRACE_BLEND_STATE,
> > +                                           GENX(BLEND_STATE_length) * 4,
> > +                                           64, &offset);
> > +   GENX(BLEND_STATE_pack)(NULL, state, &blend);
> > +
> > +#if GEN_GEN >= 7
> > +   blorp_emit(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) {
> > +      sp.BlendStatePointer = offset;
> > +#if GEN_GEN >= 8
> > +      sp.BlendStatePointerValid = true;
> > +#endif
> > +   }
> > +#endif
> > +
> > +#if GEN_GEN >= 8
> > +   blorp_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
> > +      ps_blend.HasWriteableRT = true;
> > +   }
> > +#endif
> > +
> > +   return offset;
> > +}
> > +
> > +static uint32_t
> > +blorp_emit_color_calc_state(struct blorp_batch batch,
> > +                            const struct brw_blorp_params *params)
> > +{
> > +   uint32_t offset;
> > +   void *state = blorp_alloc_dynamic_state(batch.blorp,
> > +                                           AUB_TRACE_CC_STATE,
> > +
>  GENX(COLOR_CALC_STATE_length) * 4,
> > +                                           64, &offset);
> > +   memset(state, 0, GENX(COLOR_CALC_STATE_length) * 4);
> > +
> > +#if GEN_GEN >= 7
> > +   blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), sp) {
> > +      sp.ColorCalcStatePointer = offset;
> > +#if GEN_GEN >= 8
> > +      sp.ColorCalcStatePointerValid = true;
> > +#endif
> > +   }
> > +#endif
> > +
> > +   return offset;
> > +}
> > +
> > +static uint32_t
> > +blorp_emit_depth_stencil_state(struct blorp_batch batch,
> > +                               const struct brw_blorp_params *params)
> > +{
> > +#if GEN_GEN >= 8
> > +
> > +   /* On gen8+, DEPTH_STENCIL state is simply an instruction */
> > +   blorp_emit(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds);
> > +   return 0;
> > +
> > +#else /* GEN_GEN <= 7 */
> > +
> > +   /* See the following sections of the Sandy Bridge PRM, Volume 1,
> Part2:
> > +    *   - 7.5.3.1 Depth Buffer Clear
> > +    *   - 7.5.3.2 Depth Buffer Resolve
> > +    *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
> > +    */
> > +   struct GENX(DEPTH_STENCIL_STATE) ds = {
> > +      .DepthBufferWriteEnable = true,
> > +   };
> > +
> > +   if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) {
> > +      ds.DepthTestEnable = true;
> > +      ds.DepthTestFunction = COMPAREFUNCTION_NEVER;
> > +   }
> > +
> > +   uint32_t offset;
> > +   void *state = blorp_alloc_dynamic_state(batch.blorp,
> > +
>  AUB_TRACE_DEPTH_STENCIL_STATE,
> > +                                           GENX(DEPTH_STENCIL_STATE_length)
> * 4,
> > +                                           64, &offset);
> > +   GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds);
> > +
> > +#if GEN_GEN >= 7
> > +   blorp_emit(batch, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), sp) {
> > +      sp.PointertoDEPTH_STENCIL_STATE = offset;
> > +   }
> > +#endif
> > +
> > +   return offset;
> > +
> > +#endif /* GEN_GEN */
> > +}
> > +
> > +struct surface_state_info {
> > +   unsigned num_dwords;
> > +   unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in
> bytes */
> > +   unsigned reloc_dw;
> > +   unsigned aux_reloc_dw;
> > +};
> > +
> > +static const struct surface_state_info surface_state_infos[] = {
> > +   [6] = {6,  32, 1,  0},
> > +   [7] = {8,  32, 1,  6},
> > +   [8] = {13, 64, 8,  10},
> > +   [9] = {16, 64, 8,  10},
> > +};
> > +
> > +static void
> > +blorp_emit_surface_state(struct blorp_context *blorp,
> > +                         const struct brw_blorp_surface_info *surface,
> > +                         uint32_t *state, uint32_t state_offset,
> > +                         bool is_render_target)
> > +{
> > +   const struct surface_state_info ss_info =
> surface_state_infos[GEN_GEN];
> > +
> > +   struct isl_surf surf = surface->surf;
> > +
> > +   if (surf.dim == ISL_SURF_DIM_1D &&
> > +       surf.dim_layout == ISL_DIM_LAYOUT_GEN4_2D) {
> > +      assert(surf.logical_level0_px.height == 1);
> > +      surf.dim = ISL_SURF_DIM_2D;
> > +   }
> > +
> > +   /* Blorp doesn't support HiZ in any of the blit or slow-clear paths
> */
> > +   enum isl_aux_usage aux_usage = surface->aux_usage;
> > +   if (aux_usage == ISL_AUX_USAGE_HIZ)
> > +      aux_usage = ISL_AUX_USAGE_NONE;
> > +
> > +   const uint32_t mocs = is_render_target ? blorp->mocs.rb :
> blorp->mocs.tex;
> > +
> > +   isl_surf_fill_state(blorp->isl_dev, state,
> > +                       .surf = &surf, .view = &surface->view,
> > +                       .aux_surf = &surface->aux_surf, .aux_usage =
> aux_usage,
> > +                       .mocs = mocs, .clear_color =
> surface->clear_color,
> > +                       .x_offset_sa = surface->tile_x_sa,
> > +                       .y_offset_sa = surface->tile_y_sa);
> > +
> > +   blorp_surface_reloc(blorp, state_offset + ss_info.reloc_dw * 4,
> > +                       surface->addr, 0);
> > +
> > +   if (aux_usage != ISL_AUX_USAGE_NONE) {
> > +      /* On gen7 and prior, the bottom 12 bits of the MCS base address
> are
> > +       * used to store other information.  This should be ok, however,
> because
> > +       * surface buffer addresses are always 4K page alinged.
> > +       */
> > +      assert((surface->aux_addr.offset & 0xfff) == 0);
> > +      blorp_surface_reloc(blorp, state_offset + ss_info.aux_reloc_dw *
> 4,
> > +                          surface->aux_addr,
> state[ss_info.aux_reloc_dw]);
> > +   }
> > +}
> > +
> > +static void
> > +blorp_emit_surface_states(struct blorp_batch batch,
> > +                          const struct brw_blorp_params *params)
> > +{
> > +   uint32_t bind_offset, *bind_map;
> > +   void *surface_maps[2];
> > +
> > +   const unsigned ss_size = GENX(RENDER_SURFACE_STATE_length) * 4;
> > +   const unsigned ss_align = GENX(RENDER_SURFACE_STATE_length) > 8 ?
> 64 : 32;
> > +
> > +   unsigned num_surfaces = 1 + (params->src.addr.buffer != NULL);
> > +   blorp_alloc_binding_table(batch.blorp, num_surfaces, ss_size,
> ss_align,
> > +                             &bind_offset, &bind_map, surface_maps);
> > +
> > +   blorp_emit_surface_state(batch.blorp, &params->dst,
> > +                            surface_maps[BLORP_RENDERBUFFER_BT_INDEX],
> > +                            bind_map[BLORP_RENDERBUFFER_BT_INDEX],
> true);
> > +   if (params->src.addr.buffer) {
> > +      blorp_emit_surface_state(batch.blorp, &params->src,
> > +                               surface_maps[BLORP_TEXTURE_BT_INDEX],
> > +                               bind_map[BLORP_TEXTURE_BT_INDEX],
> false);
> > +   }
> > +
> > +#if GEN_GEN >= 7
> > +   blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), bt) {
> > +      bt.PointertoPSBindingTable = bind_offset;
> > +   }
> > +#else
> > +   blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) {
> > +      bt.PSBindingTableChange = true;
> > +      bt.PointertoPSBindingTable = bind_offset;
> > +   }
> > +#endif
> > +}
> > +
> > +static void
> > +blorp_emit_sampler_state(struct blorp_batch batch,
> > +                         const struct brw_blorp_params *params)
> > +{
> > +   struct GENX(SAMPLER_STATE) sampler = {
> > +      .MipModeFilter = MIPFILTER_NONE,
> > +      .MagModeFilter = MAPFILTER_LINEAR,
> > +      .MinModeFilter = MAPFILTER_LINEAR,
> > +      .MinLOD = 0,
> > +      .MaxLOD = 0,
> > +      .TCXAddressControlMode = TCM_CLAMP,
> > +      .TCYAddressControlMode = TCM_CLAMP,
> > +      .TCZAddressControlMode = TCM_CLAMP,
> > +      .MaximumAnisotropy = RATIO21,
> > +      .RAddressMinFilterRoundingEnable = true,
> > +      .RAddressMagFilterRoundingEnable = true,
> > +      .VAddressMinFilterRoundingEnable = true,
> > +      .VAddressMagFilterRoundingEnable = true,
> > +      .UAddressMinFilterRoundingEnable = true,
> > +      .UAddressMagFilterRoundingEnable = true,
> > +      .NonnormalizedCoordinateEnable = true,
> > +   };
> > +
> > +   uint32_t offset;
> > +   void *state = blorp_alloc_dynamic_state(batch.blorp,
> > +                                           AUB_TRACE_SAMPLER_STATE,
> > +                                           GENX(SAMPLER_STATE_length) *
> 4,
> > +                                           32, &offset);
> > +   GENX(SAMPLER_STATE_pack)(NULL, state, &sampler);
> > +
> > +#if GEN_GEN >= 7
> > +   blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) {
> > +      ssp.PointertoPSSamplerState = offset;
> > +   }
> > +#else
> > +   blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) {
> > +      ssp.VSSamplerStateChange = true;
> > +      ssp.GSSamplerStateChange = true;
> > +      ssp.PSSamplerStateChange = true;
> > +      ssp.PointertoPSSamplerState = offset;
> > +   }
> > +#endif
> > +}
> > +
> > +/* 3DSTATE_VIEWPORT_STATE_POINTERS */
> > +static void
> > +blorp_emit_viewport_state(struct blorp_batch batch,
> > +                          const struct brw_blorp_params *params)
> > +{
> > +   uint32_t cc_vp_offset;
> > +
> > +   void *state = blorp_alloc_dynamic_state(batch.blorp,
> > +                                           AUB_TRACE_CC_VP_STATE,
> > +                                           GENX(CC_VIEWPORT_length) *
> 4, 32,
> > +                                           &cc_vp_offset);
> > +
> > +   GENX(CC_VIEWPORT_pack)(&batch, state,
> > +      &(struct GENX(CC_VIEWPORT)) {
> > +         .MinimumDepth = 0.0,
> > +         .MaximumDepth = 1.0,
> > +      });
> > +
> > +#if GEN_GEN >= 7
> > +   blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) {
> > +      vsp.CCViewportPointer = cc_vp_offset;
> > +   }
> > +#else
> > +   blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) {
> > +      vsp.CCViewportStateChange = true;
> > +      vsp.PointertoCC_VIEWPORT = cc_vp_offset;
> > +   }
> > +#endif
> > +}
> > +
> > +
> > +/**
> > + * \brief Execute a blit or render pass operation.
> > + *
> > + * To execute the operation, this function manually constructs and
> emits a
> > + * batch to draw a rectangle primitive. The batchbuffer is flushed
> before
> > + * constructing and after emitting the batch.
> > + *
> > + * This function alters no GL state.
> > + */
> > +static void
> > +blorp_exec(struct blorp_context *blorp, void *batch_data,
> > +           const struct brw_blorp_params *params)
> > +{
> > +   struct blorp_batch batch = {
> > +      .blorp = blorp,
> > +      .batch = batch_data,
> > +   };
> > +
> > +   uint32_t blend_state_offset = 0;
> > +   uint32_t color_calc_state_offset = 0;
> > +   uint32_t depth_stencil_state_offset;
> > +
> > +   blorp_emit_vertex_buffers(batch, params);
> > +   blorp_emit_vertex_elements(batch, params);
> > +
> > +   emit_urb_config(batch, params);
> > +
> > +   if (params->wm_prog_data) {
> > +      blend_state_offset = blorp_emit_blend_state(batch, params);
> > +      color_calc_state_offset = blorp_emit_color_calc_state(batch,
> params);
> > +   }
> > +   depth_stencil_state_offset = blorp_emit_depth_stencil_state(batch,
> params);
> > +
> > +#if GEN_GEN <= 6
> > +   /* The dynamic state emit helpers emit their own STATE_POINTERS
> packets on
> > +    * gen7+.  However, on gen6 and earlier, they're all lumpped
> together in
> > +    * one CC_STATE_POINTERS packet so we have to emit that here.
> > +    */
> > +   blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), cc) {
> > +      cc.BLEND_STATEChange = true;
> > +      cc.COLOR_CALC_STATEChange = true;
> > +      cc.DEPTH_STENCIL_STATEChange = true;
> > +      cc.PointertoBLEND_STATE = blend_state_offset;
> > +      cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset;
> > +      cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset;
> > +   }
> > +#else
> > +   (void)blend_state_offset;
> > +   (void)color_calc_state_offset;
> > +   (void)depth_stencil_state_offset;
> > +#endif
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_CONSTANT_VS), vs);
> > +#if GEN_GEN >= 7
> > +   blorp_emit(batch, GENX(3DSTATE_CONSTANT_HS), hs);
> > +   blorp_emit(batch, GENX(3DSTATE_CONSTANT_DS), DS);
> > +#endif
> > +   blorp_emit(batch, GENX(3DSTATE_CONSTANT_GS), gs);
> > +   blorp_emit(batch, GENX(3DSTATE_CONSTANT_PS), ps);
> > +
> > +   if (params->wm_prog_data)
> > +      blorp_emit_surface_states(batch, params);
> > +
> > +   if (params->src.addr.buffer)
> > +      blorp_emit_sampler_state(batch, params);
> > +
> > +   blorp_emit_3dstate_multisample(batch.blorp, batch.batch,
> > +                                  params->dst.surf.samples);
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_SAMPLE_MASK), mask) {
> > +      mask.SampleMask = (1 << params->dst.surf.samples) - 1;
> > +   }
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_VS), vs);
> > +#if GEN_GEN >= 7
> > +   blorp_emit(batch, GENX(3DSTATE_HS), hs);
> > +   blorp_emit(batch, GENX(3DSTATE_TE), te);
> > +   blorp_emit(batch, GENX(3DSTATE_DS), DS);
> > +   blorp_emit(batch, GENX(3DSTATE_STREAMOUT), so);
> > +#endif
> > +   blorp_emit(batch, GENX(3DSTATE_GS), gs);
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_CLIP), clip) {
> > +      clip.PerspectiveDivideDisable = true;
> > +   }
> > +
> > +   blorp_emit_sf_config(batch, params);
> > +   blorp_emit_ps_config(batch, params);
> > +
> > +   blorp_emit_viewport_state(batch, params);
> > +
> > +   if (params->depth.addr.buffer) {
> > +      blorp_emit_depth_stencil_config(batch, params);
> > +   } else {
> > +      blorp_emit(batch, GENX(3DSTATE_DEPTH_BUFFER), db) {
> > +         db.SurfaceType = SURFTYPE_NULL;
> > +         db.SurfaceFormat = D32_FLOAT;
> > +      }
> > +      blorp_emit(batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz);
> > +      blorp_emit(batch, GENX(3DSTATE_STENCIL_BUFFER), sb);
> > +   }
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_CLEAR_PARAMS), clear) {
> > +      clear.DepthClearValueValid = true;
> > +      clear.DepthClearValue = params->depth.clear_color.u32[0];
> > +   }
> > +
> > +   blorp_emit(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
> > +      rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0)
> - 1;
> > +      rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0)
> - 1;
> > +   }
> > +
> > +   blorp_emit(batch, GENX(3DPRIMITIVE), prim) {
> > +      prim.VertexAccessType = SEQUENTIAL;
> > +      prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
> > +      prim.VertexCountPerInstance = 3;
> > +      prim.InstanceCount = params->num_layers;
> > +   }
> > +}
> > --
> > 2.5.0.400.gff86faf
> >
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160823/105afb9c/attachment-0001.html>


More information about the mesa-dev mailing list