[Mesa-dev] [PATCH 31/32] i965/blorp: Use genxml for gen8-9 state setup

Jason Ekstrand jason at jlekstrand.net
Thu Aug 11 21:15:28 UTC 2016


---
 src/mesa/drivers/dri/i965/Makefile.am       |  10 +-
 src/mesa/drivers/dri/i965/Makefile.sources  |   7 +-
 src/mesa/drivers/dri/i965/blorp.c           |   4 +-
 src/mesa/drivers/dri/i965/blorp_priv.h      |   3 +
 src/mesa/drivers/dri/i965/gen8_blorp.c      | 578 ----------------------------
 src/mesa/drivers/dri/i965/genX_blorp_exec.c | 157 +++++++-
 6 files changed, 172 insertions(+), 587 deletions(-)
 delete mode 100644 src/mesa/drivers/dri/i965/gen8_blorp.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am
index 31477ef..30201b2 100644
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -51,7 +51,9 @@ brw_nir_trig_workarounds.c: brw_nir_trig_workarounds.py $(top_srcdir)/src/compil
 I965_PERGEN_LIBS = \
 	libi965_gen6.la \
 	libi965_gen7.la \
-	libi965_gen75.la
+	libi965_gen75.la \
+	libi965_gen8.la \
+	libi965_gen9.la
 
 libi965_gen6_la_SOURCES = $(i965_gen6_FILES)
 libi965_gen6_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=60
@@ -62,6 +64,12 @@ libi965_gen7_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=70
 libi965_gen75_la_SOURCES = $(i965_gen75_FILES)
 libi965_gen75_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=75
 
+libi965_gen8_la_SOURCES = $(i965_gen8_FILES)
+libi965_gen8_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=80
+
+libi965_gen9_la_SOURCES = $(i965_gen9_FILES)
+libi965_gen9_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=90
+
 noinst_LTLIBRARIES = \
 	libi965_dri.la \
 	libi965_compiler.la \
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index a7f8c40..975f478 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -206,7 +206,6 @@ i965_FILES = \
 	gen7_wm_state.c \
 	gen7_wm_surface_state.c \
 	gen8_blend_state.c \
-	gen8_blorp.c \
 	gen8_depth_state.c \
 	gen8_disable.c \
 	gen8_draw_upload.c \
@@ -267,3 +266,9 @@ i965_gen7_FILES = \
 
 i965_gen75_FILES = \
 	genX_blorp_exec.c
+
+i965_gen8_FILES = \
+	genX_blorp_exec.c
+
+i965_gen9_FILES = \
+	genX_blorp_exec.c
diff --git a/src/mesa/drivers/dri/i965/blorp.c b/src/mesa/drivers/dri/i965/blorp.c
index 87cf2c9..3100615 100644
--- a/src/mesa/drivers/dri/i965/blorp.c
+++ b/src/mesa/drivers/dri/i965/blorp.c
@@ -321,9 +321,11 @@ retry:
          gen7_blorp_exec(brw, params);
       break;
    case 8:
-   case 9:
       gen8_blorp_exec(brw, params);
       break;
+   case 9:
+      gen9_blorp_exec(brw, params);
+      break;
    default:
       /* BLORP is not supported before Gen6. */
       unreachable("not reached");
diff --git a/src/mesa/drivers/dri/i965/blorp_priv.h b/src/mesa/drivers/dri/i965/blorp_priv.h
index ce6aaa7..3ca1c223 100644
--- a/src/mesa/drivers/dri/i965/blorp_priv.h
+++ b/src/mesa/drivers/dri/i965/blorp_priv.h
@@ -203,6 +203,9 @@ gen75_blorp_exec(struct brw_context *brw,
 void
 gen8_blorp_exec(struct brw_context *brw, const struct brw_blorp_params *params);
 
+void
+gen9_blorp_exec(struct brw_context *brw, const struct brw_blorp_params *params);
+
 struct brw_blorp_blit_prog_key
 {
    /* Number of samples per pixel that have been configured in the surface
diff --git a/src/mesa/drivers/dri/i965/gen8_blorp.c b/src/mesa/drivers/dri/i965/gen8_blorp.c
deleted file mode 100644
index 2223b23..0000000
--- a/src/mesa/drivers/dri/i965/gen8_blorp.c
+++ /dev/null
@@ -1,578 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <assert.h>
-
-#include "intel_batchbuffer.h"
-#include "intel_fbo.h"
-#include "intel_mipmap_tree.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-
-#include "blorp_priv.h"
-
-static uint32_t
-gen8_blorp_emit_blend_state(struct brw_context *brw,
-                            const struct brw_blorp_params *params)
-{
-   uint32_t blend_state_offset;
-
-   assume(params->num_draw_buffers);
-
-   const unsigned size = 4 + 8 * params->num_draw_buffers;
-   uint32_t *blend = (uint32_t *)brw_state_batch(brw, AUB_TRACE_BLEND_STATE,
-                                                 size, 64,
-                                                 &blend_state_offset);
-   memset(blend, 0, size);
-
-   for (unsigned i = 0; i < params->num_draw_buffers; ++i) {
-      if (params->color_write_disable[0])
-         blend[1 + 2 * i] |= GEN8_BLEND_WRITE_DISABLE_RED;
-      if (params->color_write_disable[1])
-         blend[1 + 2 * i] |= GEN8_BLEND_WRITE_DISABLE_GREEN;
-      if (params->color_write_disable[2])
-         blend[1 + 2 * i] |= GEN8_BLEND_WRITE_DISABLE_BLUE;
-      if (params->color_write_disable[3])
-         blend[1 + 2 * i] |= GEN8_BLEND_WRITE_DISABLE_ALPHA;
-
-      blend[1 + 2 * i + 1] = GEN8_BLEND_PRE_BLEND_COLOR_CLAMP_ENABLE |
-                             GEN8_BLEND_POST_BLEND_COLOR_CLAMP_ENABLE |
-                             GEN8_BLEND_COLOR_CLAMP_RANGE_RTFORMAT;
-   }
-
-   return blend_state_offset;
-}
-
-/* Hardware seems to try to fetch the constants even though the corresponding
- * stage gets disabled. Therefore make sure the settings for the constant
- * buffer are valid.
- */
-static void
-gen8_blorp_disable_constant_state(struct brw_context *brw,
-                                       unsigned opcode)
-{
-   BEGIN_BATCH(11);
-   OUT_BATCH(opcode << 16 | (11 - 2));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-}
-
-/* 3DSTATE_VS
- *
- * Disable vertex shader.
- */
-static void
-gen8_blorp_emit_vs_disable(struct brw_context *brw)
-{
-   BEGIN_BATCH(9);
-   OUT_BATCH(_3DSTATE_VS << 16 | (9 - 2));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-}
-
-/* 3DSTATE_HS
- *
- * Disable the hull shader.
- */
-static void
-gen8_blorp_emit_hs_disable(struct brw_context *brw)
-{
-   BEGIN_BATCH(9);
-   OUT_BATCH(_3DSTATE_HS << 16 | (9 - 2));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-}
-
-/* 3DSTATE_DS
- *
- * Disable the domain shader.
- */
-static void
-gen8_blorp_emit_ds_disable(struct brw_context *brw)
-{
-   const int ds_pkt_len = brw->gen >= 9 ? 11 : 9;
-   BEGIN_BATCH(ds_pkt_len);
-   OUT_BATCH(_3DSTATE_DS << 16 | (ds_pkt_len - 2));
-   for (int i = 0; i < ds_pkt_len - 1; i++)
-      OUT_BATCH(0);
-   ADVANCE_BATCH();
-}
-
-/* 3DSTATE_GS
- *
- * Disable the geometry shader.
- */
-static void
-gen8_blorp_emit_gs_disable(struct brw_context *brw)
-{
-   BEGIN_BATCH(10);
-   OUT_BATCH(_3DSTATE_GS << 16 | (10 - 2));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-}
-
-/* 3DSTATE_STREAMOUT
- *
- * Disable streamout.
- */
-static void
-gen8_blorp_emit_streamout_disable(struct brw_context *brw)
-{
-   BEGIN_BATCH(5);
-   OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (5 - 2));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-}
-
-static void
-gen8_blorp_emit_raster_state(struct brw_context *brw)
-{
-   BEGIN_BATCH(5);
-   OUT_BATCH(_3DSTATE_RASTER << 16 | (5 - 2));
-   OUT_BATCH(GEN8_RASTER_CULL_NONE);
-   OUT_BATCH_F(0);
-   OUT_BATCH_F(0);
-   OUT_BATCH_F(0);
-   ADVANCE_BATCH();
-}
-
-static void
-gen8_blorp_emit_sbe_state(struct brw_context *brw,
-                          const struct brw_blorp_params *params)
-{
-   const unsigned num_varyings = params->wm_prog_data->num_varying_inputs;
-   const unsigned urb_read_length =
-      brw_blorp_get_urb_length(params->wm_prog_data);
-
-   /* 3DSTATE_SBE */
-   {
-      const unsigned sbe_cmd_length = brw->gen == 8 ? 4 : 6;
-      BEGIN_BATCH(sbe_cmd_length);
-      OUT_BATCH(_3DSTATE_SBE << 16 | (sbe_cmd_length - 2));
-
-      /* There is no need for swizzling (GEN7_SBE_SWIZZLE_ENABLE). All the
-       * vertex data coming from vertex fetcher is taken as unmodified
-       * (i.e., passed through). Vertex shader state is disabled and vertex
-       * fetcher builds complete vertex entries including VUE header.
-       * This is for unknown reason really needed to be disabled when more
-       * than one vec4 worth of vertex attributes are needed.
-       */
-      OUT_BATCH(num_varyings << GEN7_SBE_NUM_OUTPUTS_SHIFT |
-                urb_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
-                BRW_SF_URB_ENTRY_READ_OFFSET <<
-                   GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT |
-                GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH |
-                GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET);
-      OUT_BATCH(0);
-      OUT_BATCH(params->wm_prog_data->flat_inputs);
-      if (sbe_cmd_length >= 6) {
-         /* Fragment coordinates are always enabled. */
-         uint32_t dw4 = (GEN9_SBE_ACTIVE_COMPONENT_XYZW << (0 << 1));
-
-         for (unsigned i = 0; i < num_varyings; ++i) {
-            dw4 |= (GEN9_SBE_ACTIVE_COMPONENT_XYZW << ((i + 1) << 1));
-         }
-
-         OUT_BATCH(dw4);
-         OUT_BATCH(0);
-      }
-      ADVANCE_BATCH();
-   }
-
-   {
-      BEGIN_BATCH(11);
-      OUT_BATCH(_3DSTATE_SBE_SWIZ << 16 | (11 - 2));
-
-      /* Output DWords 1 through 8: */
-      for (int i = 0; i < 8; i++) {
-         OUT_BATCH(0);
-      }
-
-      OUT_BATCH(0); /* wrapshortest enables 0-7 */
-      OUT_BATCH(0); /* wrapshortest enables 8-15 */
-      ADVANCE_BATCH();
-   }
-}
-
-static void
-gen8_blorp_emit_sf_config(struct brw_context *brw)
-{
-   /* See gen6_blorp_emit_sf_config() */
-   BEGIN_BATCH(4);
-   OUT_BATCH(_3DSTATE_SF << 16 | (4 - 2));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(GEN6_SF_LINE_AA_MODE_TRUE);
-   ADVANCE_BATCH();
-}
-
-/**
- * Disable thread dispatch (dw5.19) and enable the HiZ op.
- */
-static void
-gen8_blorp_emit_wm_state(struct brw_context *brw)
-{
-   BEGIN_BATCH(2);
-   OUT_BATCH(_3DSTATE_WM << 16 | (2 - 2));
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-}
-
-/**
- * 3DSTATE_PS
- *
- * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite
- * that, thread dispatch info must still be specified.
- *     - Maximum Number of Threads (dw4.24:31) must be nonzero, as the
- *       valid range for this field is [0x3, 0x2f].
- *     - A dispatch mode must be given; that is, at least one of the
- *       "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was
- *       discovered through simulator error messages.
- */
-static void
-gen8_blorp_emit_ps_config(struct brw_context *brw,
-                          const struct brw_blorp_params *params)
-{
-   const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
-   uint32_t dw3, dw5, dw6, dw7, ksp0, ksp2;
-
-   dw3 = dw5 = dw6 = dw7 = ksp0 = ksp2 = 0;
-   dw3 |= GEN7_PS_VECTOR_MASK_ENABLE;
-
-   if (params->src.bo) {
-      dw3 |= 1 << GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */
-      dw3 |= 2 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT; /* Two surfaces */
-   } else {
-      dw3 |= 1 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT; /* One surface */
-   }
-
-   dw7 |= prog_data->first_curbe_grf_0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0;
-   dw7 |= prog_data->first_curbe_grf_2 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2;
-
-   if (params->wm_prog_data->dispatch_8)
-      dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
-   if (params->wm_prog_data->dispatch_16)
-      dw6 |= GEN7_PS_16_DISPATCH_ENABLE;
-
-   ksp0 = params->wm_prog_kernel;
-   ksp2 = params->wm_prog_kernel + params->wm_prog_data->ksp_offset_2;
-
-   /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
-    * it implicitly scales for different GT levels (which have some # of PSDs).
-    *
-    * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
-    */
-   if (brw->gen >= 9)
-      dw6 |= (64 - 1) << HSW_PS_MAX_THREADS_SHIFT;
-   else
-      dw6 |= (64 - 2) << HSW_PS_MAX_THREADS_SHIFT;
-
-   dw6 |= GEN7_PS_POSOFFSET_NONE;
-   dw6 |= params->fast_clear_op;
-
-   BEGIN_BATCH(12);
-   OUT_BATCH(_3DSTATE_PS << 16 | (12 - 2));
-   OUT_BATCH(ksp0);
-   OUT_BATCH(0);
-   OUT_BATCH(dw3);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(dw6);
-   OUT_BATCH(dw7);
-   OUT_BATCH(0); /* kernel 1 pointer */
-   OUT_BATCH(0);
-   OUT_BATCH(ksp2);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-}
-
-static void
-gen8_blorp_emit_ps_blend(struct brw_context *brw)
-{
-   BEGIN_BATCH(2);
-   OUT_BATCH(_3DSTATE_PS_BLEND << 16 | (2 - 2));
-   OUT_BATCH(GEN8_PS_BLEND_HAS_WRITEABLE_RT);
-   ADVANCE_BATCH();
-}
-
-static void
-gen8_blorp_emit_ps_extra(struct brw_context *brw,
-                         const struct brw_blorp_params *params)
-{
-   const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
-   uint32_t dw1 = 0;
-
-   dw1 |= GEN8_PSX_PIXEL_SHADER_VALID;
-
-   if (params->src.bo)
-      dw1 |= GEN8_PSX_KILL_ENABLE;
-
-   if (params->wm_prog_data->num_varying_inputs)
-      dw1 |= GEN8_PSX_ATTRIBUTE_ENABLE;
-
-   if (params->dst.surf.samples > 1 && prog_data &&
-       prog_data->persample_msaa_dispatch)
-      dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE;
-
-   BEGIN_BATCH(2);
-   OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2));
-   OUT_BATCH(dw1);
-   ADVANCE_BATCH();
-}
-
-static void
-gen8_blorp_emit_depth_disable(struct brw_context *brw)
-{
-   /* Skip repeated NULL depth/stencil emits (think 2D rendering). */
-   if (brw->no_depth_or_stencil)
-      return;
-
-   brw_emit_depth_stall_flushes(brw);
-
-   BEGIN_BATCH(8);
-   OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (8 - 2));
-   OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | (BRW_SURFACE_NULL << 29));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-
-   BEGIN_BATCH(5);
-   OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (5 - 2));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-
-   BEGIN_BATCH(5);
-   OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (5 - 2));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-}
-
-static void
-gen8_blorp_emit_vf_topology(struct brw_context *brw)
-{
-   BEGIN_BATCH(2);
-   OUT_BATCH(_3DSTATE_VF_TOPOLOGY << 16 | (2 - 2));
-   OUT_BATCH(_3DPRIM_RECTLIST);
-   ADVANCE_BATCH();
-}
-
-static void
-gen8_blorp_emit_vf_sys_gen_vals_state(struct brw_context *brw)
-{
-   BEGIN_BATCH(2);
-   OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2));
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-}
-
-static void
-gen8_blorp_emit_vf_instancing_state(struct brw_context *brw,
-                                    const struct brw_blorp_params *params)
-{
-   const unsigned num_varyings =
-      params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
-   const unsigned num_elems = 2 + num_varyings;
-
-   for (unsigned i = 0; i < num_elems; ++i) {
-      BEGIN_BATCH(3);
-      OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
-      OUT_BATCH(i);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-}
-
-static void
-gen8_blorp_emit_vf_state(struct brw_context *brw)
-{
-   BEGIN_BATCH(2);
-   OUT_BATCH(_3DSTATE_VF << 16 | (2 - 2));
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-}
-
-static void
-gen8_blorp_emit_depth_stencil_state(struct brw_context *brw,
-                                    const struct brw_blorp_params *params)
-{
-   const unsigned pkt_len = brw->gen >= 9 ? 4 : 3;
-
-   BEGIN_BATCH(pkt_len);
-   OUT_BATCH(_3DSTATE_WM_DEPTH_STENCIL << 16 | (pkt_len - 2));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   if (pkt_len > 3) {
-      OUT_BATCH(0);
-   }
-   ADVANCE_BATCH();
-}
-
-static uint32_t
-gen8_blorp_emit_surface_states(struct brw_context *brw,
-                               const struct brw_blorp_params *params)
-{
-   uint32_t wm_surf_offset_renderbuffer;
-   uint32_t wm_surf_offset_texture = 0;
-
-   wm_surf_offset_renderbuffer =
-      brw_blorp_emit_surface_state(brw, &params->dst,
-                                   I915_GEM_DOMAIN_RENDER,
-                                   I915_GEM_DOMAIN_RENDER,
-                                   true /* is_render_target */);
-   if (params->src.bo) {
-      wm_surf_offset_texture =
-         brw_blorp_emit_surface_state(brw, &params->src,
-                                      I915_GEM_DOMAIN_SAMPLER, 0,
-                                      false /* is_render_target */);
-   }
-
-   return gen6_blorp_emit_binding_table(brw,
-                                        wm_surf_offset_renderbuffer,
-                                        wm_surf_offset_texture);
-}
-
-/**
- * \copydoc gen6_blorp_exec()
- */
-void
-gen8_blorp_exec(struct brw_context *brw, const struct brw_blorp_params *params)
-{
-   uint32_t wm_bind_bo_offset = 0;
-
-   brw_upload_state_base_address(brw);
-
-   gen7_l3_state.emit(brw);
-
-   gen7_blorp_emit_urb_config(brw, params);
-
-   const uint32_t cc_blend_state_offset =
-      gen8_blorp_emit_blend_state(brw, params);
-   gen7_blorp_emit_blend_state_pointer(brw, cc_blend_state_offset);
-
-   const uint32_t cc_state_offset = gen6_blorp_emit_cc_state(brw);
-   gen7_blorp_emit_cc_state_pointer(brw, cc_state_offset);
-
-   gen8_blorp_emit_depth_stencil_state(brw, params);
-
-   gen8_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_VS);
-   gen8_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_HS);
-   gen8_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_DS);
-   gen8_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_GS);
-   gen8_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_PS);
-
-   wm_bind_bo_offset = gen8_blorp_emit_surface_states(brw, params);
-
-   gen7_blorp_emit_binding_table_pointers_ps(brw, wm_bind_bo_offset);
-
-   if (params->src.bo) {
-      const uint32_t sampler_offset =
-         gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true);
-      gen7_blorp_emit_sampler_state_pointers_ps(brw, sampler_offset);
-   }
-
-   gen8_emit_3dstate_multisample(brw, params->dst.surf.samples);
-   gen6_emit_3dstate_sample_mask(brw,
-                                 params->dst.surf.samples > 1 ?
-                                    (1 << params->dst.surf.samples) - 1 : 1);
-
-   gen8_disable_stages.emit(brw);
-   gen8_blorp_emit_vs_disable(brw);
-   gen8_blorp_emit_hs_disable(brw);
-   gen7_blorp_emit_te_disable(brw);
-   gen8_blorp_emit_ds_disable(brw);
-   gen8_blorp_emit_gs_disable(brw);
-
-   gen8_blorp_emit_streamout_disable(brw);
-   gen6_blorp_emit_clip_disable(brw);
-   gen8_blorp_emit_raster_state(brw);
-   gen8_blorp_emit_sbe_state(brw, params);
-   gen8_blorp_emit_sf_config(brw);
-
-   gen8_blorp_emit_ps_blend(brw);
-   gen8_blorp_emit_ps_extra(brw, params);
-
-   gen8_blorp_emit_ps_config(brw, params);
-
-   gen8_blorp_emit_wm_state(brw);
-
-   gen7_blorp_emit_cc_viewport(brw);
-
-   gen8_blorp_emit_depth_disable(brw);
-   gen7_blorp_emit_clear_params(brw, params);
-   gen6_blorp_emit_drawing_rectangle(brw, params);
-   gen8_blorp_emit_vf_topology(brw);
-   gen8_blorp_emit_vf_sys_gen_vals_state(brw);
-   gen6_blorp_emit_vertices(brw, params);
-   gen8_blorp_emit_vf_instancing_state(brw, params);
-   gen8_blorp_emit_vf_state(brw);
-   gen7_blorp_emit_primitive(brw, params);
-
-   if (brw->gen < 9)
-      gen8_write_pma_stall_bits(brw, 0);
-}
diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index 1a5c761..954c39d 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -237,8 +237,12 @@ blorp_emit_vertex_buffers(struct brw_context *brw,
 
    unsigned num_buffers = 1;
 
-#if GEN_GEN == 7
-   uint32_t mocs = 1 /* GEN7_MOCS_L3 */;
+#if GEN_GEN == 9
+   uint32_t mocs = (2 << 1); /* SKL_MOCS_WB */
+#elif GEN_GEN == 8
+   uint32_t mocs = 0x78; /* BDW_MOCS_WB */
+#elif GEN_GEN == 7
+   uint32_t mocs = 1; /* GEN7_MOCS_L3 */
 #else
    uint32_t mocs = 0;
 #endif
@@ -251,22 +255,30 @@ blorp_emit_vertex_buffers(struct brw_context *brw,
 #if GEN_GEN >= 7
    vb[0].AddressModifyEnable = true;
 #endif
+#if GEN_GEN >= 8
+   vb[0].BufferSize = size;
+#else
    vb[0].BufferAccessType = VERTEXDATA;
    vb[0].EndAddress = vb[0].BufferStartingAddress;
    vb[0].EndAddress.offset += size - 1;
+#endif
 
    if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs) {
       blorp_emit_input_varying_data(brw, params,
                                     &vb[1].BufferStartingAddress, &size);
       vb[1].VertexBufferIndex = 1;
       vb[1].BufferPitch = 0;
-      vb[1].BufferAccessType = INSTANCEDATA;
       vb[1].VertexBufferMOCS = mocs;
 #if GEN_GEN >= 7
       vb[1].AddressModifyEnable = true;
 #endif
+#if GEN_GEN >= 8
+      vb[1].BufferSize = size;
+#else
+      vb[1].BufferAccessType = INSTANCEDATA;
       vb[1].EndAddress = vb[1].BufferStartingAddress;
-      vb[1].EndAddress.offset += size;
+      vb[1].EndAddress.offset += size - 1;
+#endif
       num_buffers++;
    }
 
@@ -372,6 +384,21 @@ blorp_emit_vertex_elements(struct brw_context *brw,
       GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &ve[i]);
       dw += GENX(VERTEX_ELEMENT_STATE_length);
    }
+
+#if GEN_GEN >= 8
+   blorp_emit(brw, GENX(3DSTATE_VF_SGVS), sgvs);
+
+   for (unsigned i = 0; i < num_elements; i++) {
+      blorp_emit(brw, GENX(3DSTATE_VF_INSTANCING), vf) {
+         vf.VertexElementIndex = i;
+         vf.InstancingEnable = false;
+      }
+   }
+
+   blorp_emit(brw, GENX(3DSTATE_VF_TOPOLOGY), topo) {
+      topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
+   }
+#endif
 }
 
 static void
@@ -380,7 +407,29 @@ blorp_emit_sf_config(struct brw_context *brw,
 {
    const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
 
-#if GEN_GEN >= 7
+#if GEN_GEN >= 8
+
+   blorp_emit(brw, GENX(3DSTATE_SF), sf);
+
+   blorp_emit(brw, GENX(3DSTATE_RASTER), raster) {
+      raster.CullMode = CULLMODE_NONE;
+   }
+
+   blorp_emit(brw, GENX(3DSTATE_SBE), sbe) {
+      sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
+      sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
+      sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
+      sbe.ForceVertexURBEntryReadLength = true;
+      sbe.ForceVertexURBEntryReadOffset = true;
+      sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
+
+#if GEN_GEN >= 9
+      for (unsigned i = 0; i < 32; i++)
+         sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
+#endif
+   }
+
+#elif GEN_GEN >= 7
 
    blorp_emit(brw, GENX(3DSTATE_SF), sf) {
       sf.FrontFaceFillMode = FILL_MODE_SOLID;
@@ -435,7 +484,73 @@ blorp_emit_ps_config(struct brw_context *brw,
 {
    const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
 
-#if GEN_GEN >= 7
+#if GEN_GEN >= 8
+
+   blorp_emit(brw, GENX(3DSTATE_WM), wm);
+
+   blorp_emit(brw, GENX(3DSTATE_PS), ps) {
+      if (params->src.bo) {
+         ps.SamplerCount = 1; /* Up to 4 samplers */
+         ps.BindingTableEntryCount = 2;
+      } else {
+         ps.BindingTableEntryCount = 1;
+      }
+
+      ps.DispatchGRFStartRegisterForConstantSetupData0 =
+         prog_data->first_curbe_grf_0;
+      ps.DispatchGRFStartRegisterForConstantSetupData2 =
+         prog_data->first_curbe_grf_2;
+
+      ps._8PixelDispatchEnable = prog_data->dispatch_8;
+      ps._16PixelDispatchEnable = prog_data->dispatch_16;
+
+      ps.KernelStartPointer0 = params->wm_prog_kernel;
+      ps.KernelStartPointer2 =
+         params->wm_prog_kernel + prog_data->ksp_offset_2;
+
+      /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
+       * it implicitly scales for different GT levels (which have some # of
+       * PSDs).
+       *
+       * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
+       */
+      if (GEN_GEN >= 9)
+         ps.MaximumNumberofThreadsPerPSD = 64 - 1;
+      else
+         ps.MaximumNumberofThreadsPerPSD = 64 - 2;
+
+      switch (params->fast_clear_op) {
+#if GEN_GEN >= 9
+      case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
+         ps.RenderTargetResolveType = RESOLVE_PARTIAL;
+         break;
+      case (3 << 6): /* GEN9_PS_RENDER_TARGET_RESOLVE_FULL */
+         ps.RenderTargetResolveType = RESOLVE_FULL;
+         break;
+#else
+      case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
+         ps.RenderTargetResolveEnable = true;
+         break;
+#endif
+      case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */
+         ps.RenderTargetFastClearEnable = true;
+         break;
+      }
+   }
+
+   blorp_emit(brw, GENX(3DSTATE_PS_EXTRA), psx) {
+      psx.PixelShaderValid = true;
+
+      if (params->src.bo)
+         psx.PixelShaderKillsPixel = true;
+
+      psx.AttributeEnable = prog_data->num_varying_inputs > 0;
+
+      if (prog_data && prog_data->persample_msaa_dispatch)
+         psx.PixelShaderIsPerSample = true;
+   }
+
+#elif GEN_GEN >= 7
 
    blorp_emit(brw, GENX(3DSTATE_WM), wm) {
       switch (params->hiz_op) {
@@ -670,6 +785,15 @@ blorp_emit_blend_state(struct brw_context *brw,
 #if GEN_GEN >= 7
    blorp_emit(brw, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) {
       sp.BlendStatePointer = offset;
+#if GEN_GEN >= 8
+      sp.BlendStatePointerValid = true;
+#endif
+   }
+#endif
+
+#if GEN_GEN >= 8
+   blorp_emit(brw, GENX(3DSTATE_PS_BLEND), ps_blend) {
+      ps_blend.HasWriteableRT = true;
    }
 #endif
 
@@ -688,6 +812,9 @@ blorp_emit_color_calc_state(struct brw_context *brw,
 #if GEN_GEN >= 7
    blorp_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), sp) {
       sp.ColorCalcStatePointer = offset;
+#if GEN_GEN >= 8
+      sp.ColorCalcStatePointerValid = true;
+#endif
    }
 #endif
 
@@ -698,6 +825,14 @@ static uint32_t
 blorp_emit_depth_stencil_state(struct brw_context *brw,
                                const struct brw_blorp_params *params)
 {
+#if GEN_GEN >= 8
+
+   /* On gen8+, DEPTH_STENCIL state is simply an instruction */
+   blorp_emit(brw, GENX(3DSTATE_WM_DEPTH_STENCIL), ds);
+   return 0;
+
+#else /* GEN_GEN <= 7 */
+
    /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
     *   - 7.5.3.1 Depth Buffer Clear
     *   - 7.5.3.2 Depth Buffer Resolve
@@ -725,6 +860,8 @@ blorp_emit_depth_stencil_state(struct brw_context *brw,
 #endif
 
    return offset;
+
+#endif /* GEN_GEN */
 }
 
 static void
@@ -855,6 +992,10 @@ genX(blorp_exec)(struct brw_context *brw,
 
    brw_upload_state_base_address(brw);
 
+#if GEN_GEN >= 8
+   gen7_l3_state.emit(brw);
+#endif
+
    blorp_emit_vertex_buffers(brw, params);
    blorp_emit_vertex_elements(brw, params);
 
@@ -902,7 +1043,11 @@ genX(blorp_exec)(struct brw_context *brw,
    if (params->src.bo)
       blorp_emit_sampler_state(brw, params);
 
+#if GEN_GEN >= 8
+   gen8_emit_3dstate_multisample(brw, params->dst.surf.samples);
+#else
    gen6_emit_3dstate_multisample(brw, params->dst.surf.samples);
+#endif
 
    blorp_emit(brw, GENX(3DSTATE_SAMPLE_MASK), mask) {
       mask.SampleMask = (1 << params->dst.surf.samples) - 1;
-- 
2.5.0.400.gff86faf



More information about the mesa-dev mailing list