[Mesa-dev] [PATCH 13/40] i965/blorp: Pipeline upload support for gen8

Kenneth Graunke kenneth at whitecape.org
Mon Apr 18 07:22:06 UTC 2016


On Saturday, April 16, 2016 4:42:41 PM PDT Topi Pohjolainen wrote:
> Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
> ---
>  src/mesa/drivers/dri/i965/Makefile.sources |   1 +
>  src/mesa/drivers/dri/i965/brw_blorp.h      |   3 +
>  src/mesa/drivers/dri/i965/gen8_blorp.cpp   | 694 ++++++++++++++++++++++++++
+++
>  3 files changed, 698 insertions(+)
>  create mode 100644 src/mesa/drivers/dri/i965/gen8_blorp.cpp
> 
> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/
dri/i965/Makefile.sources
> index c314d74..0143c27 100644
> --- a/src/mesa/drivers/dri/i965/Makefile.sources
> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
> @@ -208,6 +208,7 @@ i965_FILES = \
>  	gen7_wm_state.c \
>  	gen7_wm_surface_state.c \
>  	gen8_blend_state.c \
> +	gen8_blorp.cpp \
>  	gen8_depth_state.c \
>  	gen8_disable.c \
>  	gen8_draw_upload.c \
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/
i965/brw_blorp.h
> index 9a0fffc..8e30770 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.h
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.h
> @@ -246,6 +246,9 @@ void
>  gen7_blorp_exec(struct brw_context *brw,
>                  const brw_blorp_params *params);
>  
> +void
> +gen8_blorp_exec(struct brw_context *brw, const brw_blorp_params *params);
> +
>  /**
>   * Parameters for a HiZ or depth resolve operation.
>   *
> diff --git a/src/mesa/drivers/dri/i965/gen8_blorp.cpp b/src/mesa/drivers/
dri/i965/gen8_blorp.cpp
> new file mode 100644
> index 0000000..0c15d36
> --- /dev/null
> +++ b/src/mesa/drivers/dri/i965/gen8_blorp.cpp
> @@ -0,0 +1,694 @@
> +/*
> + * Copyright © 2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the 
"Software"),
> + * to deal in the Software without restriction, including without 
limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the 
next
> + * paragraph) shall be included in all copies or substantial portions of 
the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include <assert.h>
> +
> +#include "intel_batchbuffer.h"
> +#include "intel_fbo.h"
> +#include "intel_mipmap_tree.h"
> +
> +#include "brw_context.h"
> +#include "brw_defines.h"
> +#include "brw_state.h"
> +
> +#include "brw_blorp.h"
> +
> +
> +/* SURFACE_STATE for renderbuffer or texture surface (see
> + * brw_update_renderbuffer_surface and brw_update_texture_surface)
> + */
> +static uint32_t
> +gen8_blorp_emit_surface_state(struct brw_context *brw,
> +                              const brw_blorp_surface_info *surface,
> +                              uint32_t read_domains, uint32_t write_domain,
> +                              bool is_render_target)
> +{
> +   uint32_t wm_surf_offset;
> +   const struct intel_mipmap_tree *mt = surface->mt;
> +   const uint32_t mocs_wb = is_render_target ?
> +                               (brw->gen >= 9 ? SKL_MOCS_PTE : 
BDW_MOCS_PTE) :
> +                               (brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB);
> +   const uint32_t tiling = surface->map_stencil_as_y_tiled
> +      ? I915_TILING_Y : mt->tiling;
> +   uint32_t tile_x, tile_y;
> +
> +   uint32_t *surf = gen8_allocate_surface_state(brw, &wm_surf_offset, -1);
> +
> +   surf[0] = BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
> +             surface->brw_surfaceformat << BRW_SURFACE_FORMAT_SHIFT |
> +             gen8_vertical_alignment(brw, mt, BRW_SURFACE_2D) |
> +             gen8_horizontal_alignment(brw, mt, BRW_SURFACE_2D) |
> +             gen8_surface_tiling_mode(tiling);
> +
> +   surf[1] = SET_FIELD(mocs_wb, GEN8_SURFACE_MOCS) | mt->qpitch >> 2;
> +
> +   surf[2] = SET_FIELD(surface->width - 1, GEN7_SURFACE_WIDTH) |
> +             SET_FIELD(surface->height - 1, GEN7_SURFACE_HEIGHT);
> +
> +   uint32_t pitch_bytes = mt->pitch;
> +   if (surface->map_stencil_as_y_tiled)
> +      pitch_bytes *= 2;
> +   surf[3] = pitch_bytes - 1;
> +
> +   surf[4] = gen7_surface_msaa_bits(surface->num_samples,
> +                                    surface->msaa_layout);
> +
> +   if (surface->mt->mcs_mt) {
> +      surf[6] = SET_FIELD(surface->mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) 
|
> +                SET_FIELD((surface->mt->mcs_mt->pitch / 128) - 1,
> +                          GEN8_SURFACE_AUX_PITCH) |
> +                GEN8_SURFACE_AUX_MODE_MCS;
> +   } else {
> +      surf[6] = 0;
> +   }
> +
> +   gen8_emit_fast_clear_color(brw, mt, surf);
> +   surf[7] |= SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
> +              SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
> +              SET_FIELD(HSW_SCS_BLUE,  GEN7_SURFACE_SCS_B) |
> +              SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
> +
> +    /* reloc */
> +   *((uint64_t *)&surf[8]) =
> +      surface->compute_tile_offsets(&tile_x, &tile_y) + mt->bo->offset64;
> +
> +   /* Note that the low bits of these fields are missing, so there's the
> +    * possibility of getting in trouble.
> +    */
> +   assert(tile_x % 4 == 0);
> +   assert(tile_y % 4 == 0);
> +   surf[5] = SET_FIELD(tile_x / 4, BRW_SURFACE_X_OFFSET) |
> +             SET_FIELD(tile_y / 4, GEN8_SURFACE_Y_OFFSET);
> +
> +   if (brw->gen >= 9) {
> +      /* Disable Mip Tail by setting a large value. */
> +      surf[5] |= SET_FIELD(15, GEN9_SURFACE_MIP_TAIL_START_LOD);
> +   }
> +
> +   if (surface->mt->mcs_mt) {
> +      *((uint64_t *) &surf[10]) = surface->mt->mcs_mt->bo->offset64;
> +      drm_intel_bo_emit_reloc(brw->batch.bo,
> +                              wm_surf_offset + 10 * 4,
> +                              surface->mt->mcs_mt->bo, 0,
> +                              read_domains, write_domain);
> +   }
> +
> +   /* Emit relocation to surface contents */
> +   drm_intel_bo_emit_reloc(brw->batch.bo,
> +                           wm_surf_offset + 8 * 4,
> +                           mt->bo,
> +                           surf[8] - mt->bo->offset64,
> +                           read_domains, write_domain);
> +
> +   return wm_surf_offset;
> +}
> +
> +static uint32_t
> +gen8_blorp_emit_blend_state(struct brw_context *brw,
> +                            const brw_blorp_params *params)
> +{
> +   uint32_t blend_state_offset;
> +
> +   assume(params->num_draw_buffers);
> +
> +   const unsigned size = 4 + 8 * params->num_draw_buffers;
> +   uint32_t *blend = (uint32_t *)brw_state_batch(brw, 
AUB_TRACE_BLEND_STATE,
> +                                                 size, 64,
> +                                                 &blend_state_offset);
> +   memset(blend, 0, size);
> +
> +   for (unsigned i = 0; i < params->num_draw_buffers; ++i) {
> +      blend[1 + 2 * i + 1] = GEN8_BLEND_PRE_BLEND_COLOR_CLAMP_ENABLE |
> +                             GEN8_BLEND_POST_BLEND_COLOR_CLAMP_ENABLE |
> +                             GEN8_BLEND_COLOR_CLAMP_RANGE_RTFORMAT;
> +   }
> +
> +   return blend_state_offset;
> +}
> +
> +static void
> +gen8_blorp_emit_disable_constant_state(struct brw_context *brw,
> +                                       unsigned opcode)
> +{
> +   BEGIN_BATCH(11);
> +   OUT_BATCH(opcode << 16 | (11 - 2));
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   ADVANCE_BATCH();
> +}
> +
> +static void
> +gen8_blorp_emit_disable_binding_table(struct brw_context *brw,
> +                                      unsigned opcode)
> +{
> +
> +   BEGIN_BATCH(2);
> +   OUT_BATCH(opcode << 16 | (2 - 2));
> +   OUT_BATCH(0);
> +   ADVANCE_BATCH();
> +}
> +
> +/* 3DSTATE_VS
> + *
> + * Disable vertex shader.
> + */
> +static void
> +gen8_blorp_emit_vs_disable(struct brw_context *brw)
> +{
> +   BEGIN_BATCH(9);
> +   OUT_BATCH(_3DSTATE_VS << 16 | (9 - 2));
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   ADVANCE_BATCH();
> +}
> +
> +/* 3DSTATE_HS
> + *
> + * Disable the hull shader.
> + */
> +static void
> +gen8_blorp_emit_hs_disable(struct brw_context *brw)
> +{
> +   BEGIN_BATCH(9);
> +   OUT_BATCH(_3DSTATE_HS << 16 | (9 - 2));
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   ADVANCE_BATCH();
> +}
> +
> +/* 3DSTATE_DS
> + *
> + * Disable the domain shader.
> + */
> +static void
> +gen8_blorp_emit_ds_disable(struct brw_context *brw)
> +{
> +   const int ds_pkt_len = brw->gen >= 9 ? 11 : 9;
> +   BEGIN_BATCH(ds_pkt_len);
> +   OUT_BATCH(_3DSTATE_DS << 16 | (ds_pkt_len - 2));
> +   for (int i = 0; i < ds_pkt_len - 1; i++)
> +      OUT_BATCH(0);
> +   ADVANCE_BATCH();
> +}
> +
> +/* 3DSTATE_GS
> + *
> + * Disable the geometry shader.
> + */
> +static void
> +gen8_blorp_emit_gs_disable(struct brw_context *brw)
> +{
> +   BEGIN_BATCH(10);
> +   OUT_BATCH(_3DSTATE_GS << 16 | (10 - 2));
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   ADVANCE_BATCH();
> +}
> +
> +/* 3DSTATE_STREAMOUT
> + *
> + * Disable streamout.
> + */
> +static void
> +gen8_blorp_emit_streamout_disable(struct brw_context *brw)
> +{
> +   BEGIN_BATCH(5);
> +   OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (5 - 2));
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   ADVANCE_BATCH();
> +}
> +
> +static void
> +gen8_blorp_emit_raster_state(struct brw_context *brw)
> +{
> +   BEGIN_BATCH(5);
> +   OUT_BATCH(_3DSTATE_RASTER << 16 | (5 - 2));
> +   OUT_BATCH(GEN8_RASTER_CULL_NONE | GEN8_RASTER_FRONT_WINDING_CCW);

I don't think we should be setting GEN8_RASTER_FRONT_WINDING_CCW - we
didn't set GEN6_SF_WINDING_CCW on Gen6-7.

> +   OUT_BATCH_F(0);
> +   OUT_BATCH_F(0);
> +   OUT_BATCH_F(0);
> +   ADVANCE_BATCH();
> +}
> +
> +static void
> +gen8_blorp_emit_sbe_state(struct brw_context *brw,
> +                          const brw_blorp_params *params)
> +{
> +   /* 3DSTATE_SBE */
> +   {
> +      const unsigned sbe_cmd_length = brw->gen == 8 ? 4 : 6;
> +      BEGIN_BATCH(sbe_cmd_length);
> +      OUT_BATCH(_3DSTATE_SBE << 16 | (sbe_cmd_length - 2));
> +      OUT_BATCH(GEN7_SBE_SWIZZLE_ENABLE |
> +                params->num_varyings << GEN7_SBE_NUM_OUTPUTS_SHIFT |
> +                1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
> +                BRW_SF_URB_ENTRY_READ_OFFSET <<
> +                   GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT |
> +                GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH |
> +                GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET);
> +      OUT_BATCH(0);
> +      OUT_BATCH(0);
> +      if (sbe_cmd_length >= 6) {
> +         OUT_BATCH(GEN9_SBE_ACTIVE_COMPONENT_XYZW << (0 << 1));
> +         OUT_BATCH(0);
> +      }
> +      ADVANCE_BATCH();
> +   }
> +
> +   {
> +      BEGIN_BATCH(11);
> +      OUT_BATCH(_3DSTATE_SBE_SWIZ << 16 | (11 - 2));
> +
> +      /* Output DWords 1 through 8: */
> +      for (int i = 0; i < 8; i++) {
> +         OUT_BATCH(0);
> +      }
> +
> +      OUT_BATCH(0); /* wrapshortest enables 0-7 */
> +      OUT_BATCH(0); /* wrapshortest enables 8-15 */
> +      ADVANCE_BATCH();
> +   }
> +}
> +
> +static void
> +gen8_blorp_emit_sf_config(struct brw_context *brw)
> +{
> +   /* See gen6_blorp_emit_sf_config() */
> +   BEGIN_BATCH(4);
> +   OUT_BATCH(_3DSTATE_SF << 16 | (4 - 2));
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(GEN6_SF_LINE_AA_MODE_TRUE);
> +   ADVANCE_BATCH();
> +}
> +
> +/**
> + * Disable thread dispatch (dw5.19) and enable the HiZ op.
> + */
> +static void
> +gen8_blorp_emit_wm_state(struct brw_context *brw)
> +{
> +   BEGIN_BATCH(2);
> +   OUT_BATCH(_3DSTATE_WM << 16 | (2 - 2));
> +   OUT_BATCH(GEN7_WM_LINE_AA_WIDTH_1_0 |
> +             GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5 |
> +             GEN7_WM_POINT_RASTRULE_UPPER_RIGHT);
> +   ADVANCE_BATCH();
> +}
> +
> +/**
> + * 3DSTATE_PS
> + *
> + * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite
> + * that, thread dispatch info must still be specified.
> + *     - Maximum Number of Threads (dw4.24:31) must be nonzero, as the
> + *       valid range for this field is [0x3, 0x2f].
> + *     - A dispatch mode must be given; that is, at least one of the
> + *       "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was
> + *       discovered through simulator error messages.
> + */
> +static void
> +gen8_blorp_emit_ps_config(struct brw_context *brw,
> +                          const brw_blorp_params *params,
> +                          uint32_t prog_offset,
> +                          brw_blorp_prog_data *prog_data)
> +{
> +   uint32_t dw3, dw5, dw6, dw7;
> +
> +   dw3 = dw5 = dw6 = dw7 = 0;
> +   dw3 |= GEN7_PS_VECTOR_MASK_ENABLE;
> +
> +   dw6 |= GEN7_PS_16_DISPATCH_ENABLE;
> +
> +   dw3 |= 1 << GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */
> +   dw3 |= 2 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT; /* Two surfaces */
> +   dw6 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
> +   dw7 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0;
> +
> +   /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
> +    * it implicitly scales for different GT levels (which have some # of 
PSDs).
> +    *
> +    * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
> +    */
> +   if (brw->gen >= 9)
> +      dw6 |= (64 - 1) << HSW_PS_MAX_THREADS_SHIFT;
> +   else
> +      dw6 |= (64 - 2) << HSW_PS_MAX_THREADS_SHIFT;
> +
> +   dw6 |= GEN7_PS_POSOFFSET_NONE;
> +
> +   BEGIN_BATCH(12);
> +   OUT_BATCH(_3DSTATE_PS << 16 | (12 - 2));
> +   OUT_BATCH(prog_offset);
> +   OUT_BATCH(0);
> +   OUT_BATCH(dw3);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(dw6);
> +   OUT_BATCH(dw7);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   ADVANCE_BATCH();
> +}
> +
> +static void
> +gen8_blorp_emit_ps_blend(struct brw_context *brw)
> +{
> +   BEGIN_BATCH(2);
> +   OUT_BATCH(_3DSTATE_PS_BLEND << 16 | (2 - 2));
> +   OUT_BATCH(GEN8_PS_BLEND_HAS_WRITEABLE_RT);
> +   ADVANCE_BATCH();
> +}
> +
> +static void
> +gen8_blorp_emit_ps_extra(struct brw_context *brw,
> +                         const brw_blorp_params *params,
> +                         const brw_blorp_prog_data *prog_data)
> +{
> +   uint32_t dw1 = 0;
> +
> +   dw1 |= GEN8_PSX_PIXEL_SHADER_VALID;
> +
> +   if (params->src.mt) {

This might be better as:

   if (params->use_wm_prog) {

It should be equivalent, as I don't think we execute programs for
clears, but...maybe a bit clearer?  It's at least what Gen7 does.

> +      dw1 |= GEN8_PSX_KILL_ENABLE;
> +      dw1 |= GEN8_PSX_ATTRIBUTE_ENABLE;
> +   }
> +
> +   if (params->dst.num_samples > 1 && prog_data &&
> +       prog_data->persample_msaa_dispatch)
> +      dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE;

Maybe put this block inside that the use_wm_prog condition, like Gen7...

> +
> +   BEGIN_BATCH(2);
> +   OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2));
> +   OUT_BATCH(dw1);
> +   ADVANCE_BATCH();
> +}
> +
> +static void
> +gen8_blorp_emit_depth_disable(struct brw_context *brw)
> +{
> +   /* Skip repeated NULL depth/stencil emits (think 2D rendering). */
> +   if (brw->no_depth_or_stencil)
> +      return;
> +
> +   brw_emit_depth_stall_flushes(brw);
> +
> +   BEGIN_BATCH(8);
> +   OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (8 - 2));
> +   OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | (BRW_SURFACE_NULL << 29));
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   ADVANCE_BATCH();
> +
> +   BEGIN_BATCH(5);
> +   OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (5 - 2));
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   ADVANCE_BATCH();
> +
> +   BEGIN_BATCH(5);
> +   OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (5 - 2));
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   ADVANCE_BATCH();
> +}
> +
> +static void
> +gen8_blorp_emit_vf_topology(struct brw_context *brw)
> +{
> +   BEGIN_BATCH(2);
> +   OUT_BATCH(_3DSTATE_VF_TOPOLOGY << 16 | (2 - 2));
> +   OUT_BATCH(_3DPRIM_RECTLIST);
> +   ADVANCE_BATCH();
> +}
> +
> +static void
> +gen8_blorp_emit_vf_sys_gen_vals_state(struct brw_context *brw)
> +{
> +   BEGIN_BATCH(2);
> +   OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2));
> +   OUT_BATCH(0);
> +   ADVANCE_BATCH();
> +}
> +
> +static void
> +gen8_blorp_emit_vf_instancing_state(struct brw_context *brw,
> +                                    unsigned num_elems)
> +{
> +   for (unsigned i = 0; i < num_elems; ++i) {
> +      BEGIN_BATCH(3);
> +      OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
> +      OUT_BATCH(i);
> +      OUT_BATCH(0);
> +      ADVANCE_BATCH();
> +   }
> +}
> +
> +static void
> +gen8_blorp_emit_vf_state(struct brw_context *brw)
> +{
> +   BEGIN_BATCH(2);
> +   OUT_BATCH(_3DSTATE_VF << 16 | (2 - 2));
> +   OUT_BATCH(0);
> +   ADVANCE_BATCH();
> +}
> +
> +static void
> +gen8_blorp_emit_depth_stencil_state(struct brw_context *brw,
> +                                    const brw_blorp_params *params)
> +{
> +   const unsigned pkt_len = brw->gen >= 9 ? 4 : 3;
> +
> +   BEGIN_BATCH(pkt_len);
> +   OUT_BATCH(_3DSTATE_WM_DEPTH_STENCIL << 16 | (pkt_len - 2));
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   if (pkt_len > 3) {
> +      OUT_BATCH(0);
> +   }
> +   ADVANCE_BATCH();
> +}
> +
> +static void
> +gen8_blorp_emit_constant_ps(struct brw_context *brw,
> +                            uint32_t wm_push_const_offset)
> +{
> +   const int dwords = brw->gen >= 8 ? 11 : 7;
> +   BEGIN_BATCH(dwords);
> +   OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (dwords - 2));
> +
> +   if (brw->gen >= 9) {
> +      OUT_BATCH(0);
> +      OUT_BATCH(BRW_BLORP_NUM_PUSH_CONST_REGS);
> +   } else {
> +      OUT_BATCH(BRW_BLORP_NUM_PUSH_CONST_REGS);
> +      OUT_BATCH(0);
> +   }
> +
> +   if (brw->gen >= 9) {
> +      OUT_BATCH(0);
> +      OUT_BATCH(0);
> +      OUT_BATCH(0);
> +      OUT_BATCH(0);
> +      OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_RENDER, 0,
> +                  wm_push_const_offset);
> +      OUT_BATCH(0);
> +      OUT_BATCH(0);
> +   } else {
> +      OUT_BATCH(wm_push_const_offset);
> +      OUT_BATCH(0);
> +      OUT_BATCH(0);
> +      OUT_BATCH(0);
> +      OUT_BATCH(0);
> +      OUT_BATCH(0);
> +      OUT_BATCH(0);
> +      OUT_BATCH(0);
> +   }
> +
> +   ADVANCE_BATCH();
> +}
> +
> +static uint32_t
> +gen8_blorp_emit_surface_states(struct brw_context *brw,
> +                               const brw_blorp_params *params)
> +{
> +   uint32_t wm_surf_offset_renderbuffer;
> +   uint32_t wm_surf_offset_texture = 0;
> +
> +   intel_miptree_used_for_rendering(params->dst.mt);
> +
> +   wm_surf_offset_renderbuffer =
> +      gen8_blorp_emit_surface_state(brw, &params->dst,
> +                                    I915_GEM_DOMAIN_RENDER,
> +                                    I915_GEM_DOMAIN_RENDER,
> +                                    true /* is_render_target */);
> +   if (params->src.mt) {
> +      const brw_blorp_surface_info *surface = &params->src;
> +      intel_mipmap_tree *mt = surface->mt;
> +
> +      /* Textures are always sampled as 2D. */
> +      const bool is_cube = mt->target == GL_TEXTURE_CUBE_MAP_ARRAY ||
> +                           mt->target == GL_TEXTURE_CUBE_MAP;
> +      const unsigned depth = (is_cube ? 6 : 1) * mt->logical_depth0;
> +      const GLenum target = is_cube ? GL_TEXTURE_2D_ARRAY : mt->target;
> +      const unsigned layer = surface->layer / MAX2(mt->num_samples, 1);
> +      const unsigned max_level = surface->level + mt->last_level + 1;
> +
> +      brw->vtbl.emit_texture_surface_state(brw, mt, target,
> +                                           layer, layer + depth,
> +                                           surface->level, max_level,
> +                                           surface->brw_surfaceformat,
> +                                           SWIZZLE_XYZW,
> +                                           &wm_surf_offset_texture,
> +                                           -1, false, false);
> +   }
> +
> +   return gen6_blorp_emit_binding_table(brw,
> +                                        wm_surf_offset_renderbuffer,
> +                                        wm_surf_offset_texture);
> +}
> +
> +/**
> + * \copydoc gen6_blorp_exec()
> + */
> +void
> +gen8_blorp_exec(struct brw_context *brw, const brw_blorp_params *params)
> +{
> +   brw_blorp_prog_data *prog_data = NULL;
> +   uint32_t wm_bind_bo_offset = 0;
> +
> +   uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
> +
> +   gen8_upload_state_base_address(brw);
> +   gen7_blorp_emit_cc_viewport(brw);
> +   gen7_l3_state.emit(brw);
> +
> +   gen7_blorp_emit_urb_config(brw);
> +
> +   const uint32_t cc_blend_state_offset =
> +      gen8_blorp_emit_blend_state(brw, params);
> +   gen7_blorp_emit_blend_state_pointer(brw, cc_blend_state_offset);
> +
> +   const uint32_t cc_state_offset = gen6_blorp_emit_cc_state(brw);
> +   gen7_blorp_emit_cc_state_pointer(brw, cc_state_offset);
> +
> +   gen8_blorp_emit_disable_constant_state(brw, _3DSTATE_CONSTANT_VS);
> +   gen8_blorp_emit_disable_constant_state(brw, _3DSTATE_CONSTANT_HS);
> +   gen8_blorp_emit_disable_constant_state(brw, _3DSTATE_CONSTANT_DS);
> +   gen8_blorp_emit_disable_constant_state(brw, _3DSTATE_CONSTANT_GS);
> +
> +   const uint32_t wm_push_const_offset =
> +      gen6_blorp_emit_wm_constants(brw, params);
> +   gen8_blorp_emit_constant_ps(brw, wm_push_const_offset);
> +   wm_bind_bo_offset = gen8_blorp_emit_surface_states(brw, params);
> +
> +   gen8_blorp_emit_disable_binding_table(brw,
> +                                         
_3DSTATE_BINDING_TABLE_POINTERS_VS);
> +   gen8_blorp_emit_disable_binding_table(brw,
> +                                         
_3DSTATE_BINDING_TABLE_POINTERS_HS);
> +   gen8_blorp_emit_disable_binding_table(brw,
> +                                         
_3DSTATE_BINDING_TABLE_POINTERS_DS);
> +   gen8_blorp_emit_disable_binding_table(brw,
> +                                         
_3DSTATE_BINDING_TABLE_POINTERS_GS);
> +
> +   gen7_blorp_emit_binding_table_pointers_ps(brw, wm_bind_bo_offset);
> +   const uint32_t sampler_offset =
> +      gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true);
> +   gen7_blorp_emit_sampler_state_pointers_ps(brw, sampler_offset);
> +
> +   gen8_emit_3dstate_multisample(brw, params->dst.num_samples);
> +   gen6_emit_3dstate_sample_mask(brw,
> +                                 params->dst.num_samples > 1 ?
> +                                    (1 << params->dst.num_samples) - 1 : 
1);
> +
> +   gen8_disable_stages.emit(brw);
> +   gen8_blorp_emit_vs_disable(brw);
> +   gen8_blorp_emit_hs_disable(brw);
> +   gen7_blorp_emit_te_disable(brw);
> +   gen8_blorp_emit_ds_disable(brw);
> +   gen8_blorp_emit_gs_disable(brw);
> +
> +   gen8_blorp_emit_streamout_disable(brw);
> +   gen6_blorp_emit_clip_disable(brw);
> +   gen8_blorp_emit_raster_state(brw);
> +   gen8_blorp_emit_sbe_state(brw, params);
> +   gen8_blorp_emit_sf_config(brw);
> +
> +   gen8_blorp_emit_ps_blend(brw);
> +   gen8_blorp_emit_ps_extra(brw, params, prog_data);
> +
> +   gen8_blorp_emit_ps_config(brw, params, prog_offset, prog_data);
> +
> +   gen8_blorp_emit_depth_stencil_state(brw, params);
> +   gen8_blorp_emit_wm_state(brw);
> +
> +   gen8_blorp_emit_depth_disable(brw);
> +   gen7_blorp_emit_clear_params(brw, params);
> +   gen6_blorp_emit_drawing_rectangle(brw, params);
> +   gen8_blorp_emit_vf_topology(brw);
> +   gen8_blorp_emit_vf_sys_gen_vals_state(brw);
> +   gen6_blorp_emit_vertices(brw, params);
> +   gen8_blorp_emit_vf_instancing_state(brw, 2);
> +   gen8_blorp_emit_vf_state(brw);
> +   gen7_blorp_emit_primitive(brw, params);
> +}
> 

I think we need to call write_pma_stall_bits(brw, 0) here.  The GPU gets
very upset if you leave the PMA stall bits configured incorrectly - and
they might be enabled prior to a BLORP operation.

Disabling it is always safe.  It's also the right thing to do currently,
since BLORP doesn't use the depth pipeline.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160418/3848ddd2/attachment-0001.sig>


More information about the mesa-dev mailing list