[Mesa-dev] [PATCH 3/4] i965: Use genxml for emitting PIPE_CONTROL.
Kenneth Graunke
kenneth at whitecape.org
Fri Nov 2 03:04:20 UTC 2018
While this does add a bunch of boilerplate, it also protects us against
the hardware moving bits, or changing their meaning. For something as
finnicky as PIPE_CONTROL, the extra safety seems worth it.
We turn PIPE_CONTROL_* into an bitfield of arbitrary flags, and then
pack them appropriately.
---
src/mesa/drivers/dri/i965/Makefile.sources | 9 +
src/mesa/drivers/dri/i965/brw_context.h | 4 +
src/mesa/drivers/dri/i965/brw_pipe_control.c | 240 +++--------------
src/mesa/drivers/dri/i965/brw_pipe_control.h | 58 +++--
src/mesa/drivers/dri/i965/brw_state.h | 31 +++
src/mesa/drivers/dri/i965/genX_pipe_control.c | 243 ++++++++++++++++++
src/mesa/drivers/dri/i965/meson.build | 4 +-
7 files changed, 359 insertions(+), 230 deletions(-)
create mode 100644 src/mesa/drivers/dri/i965/genX_pipe_control.c
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 63fa7b886f2..e4eb0339e09 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -127,46 +127,55 @@ intel_tiled_memcpy_dep_FILES = \
i965_gen4_FILES = \
genX_blorp_exec.c \
genX_boilerplate.h \
+ genX_pipe_control.c \
genX_state_upload.c
i965_gen45_FILES = \
genX_blorp_exec.c \
genX_boilerplate.h \
+ genX_pipe_control.c \
genX_state_upload.c
i965_gen5_FILES = \
genX_blorp_exec.c \
genX_boilerplate.h \
+ genX_pipe_control.c \
genX_state_upload.c
i965_gen6_FILES = \
genX_blorp_exec.c \
genX_boilerplate.h \
+ genX_pipe_control.c \
genX_state_upload.c
i965_gen7_FILES = \
genX_blorp_exec.c \
genX_boilerplate.h \
+ genX_pipe_control.c \
genX_state_upload.c
i965_gen75_FILES = \
genX_blorp_exec.c \
genX_boilerplate.h \
+ genX_pipe_control.c \
genX_state_upload.c
i965_gen8_FILES = \
genX_blorp_exec.c \
genX_boilerplate.h \
+ genX_pipe_control.c \
genX_state_upload.c
i965_gen9_FILES = \
genX_blorp_exec.c \
genX_boilerplate.h \
+ genX_pipe_control.c \
genX_state_upload.c
i965_gen10_FILES = \
genX_blorp_exec.c \
genX_boilerplate.h \
+ genX_pipe_control.c \
genX_state_upload.c
i965_gen11_FILES = \
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 7fd15669eb9..fe75425854c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -752,6 +752,10 @@ struct brw_context
struct brw_bo *bo,
uint32_t offset_in_bytes,
uint32_t report_id);
+
+ void (*emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
+ struct brw_bo *bo, uint32_t offset,
+ uint64_t imm);
} vtbl;
struct brw_bufmgr *bufmgr;
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index 4d76e5dc9b7..cf9cc35875f 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -23,200 +23,10 @@
#include "brw_context.h"
#include "brw_defines.h"
+#include "brw_state.h"
#include "intel_batchbuffer.h"
#include "intel_fbo.h"
-/**
- * According to the latest documentation, any PIPE_CONTROL with the
- * "Command Streamer Stall" bit set must also have another bit set,
- * with five different options:
- *
- * - Render Target Cache Flush
- * - Depth Cache Flush
- * - Stall at Pixel Scoreboard
- * - Post-Sync Operation
- * - Depth Stall
- * - DC Flush Enable
- *
- * I chose "Stall at Pixel Scoreboard" since we've used it effectively
- * in the past, but the choice is fairly arbitrary.
- */
-static void
-gen8_add_cs_stall_workaround_bits(uint32_t *flags)
-{
- uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_WRITE_IMMEDIATE |
- PIPE_CONTROL_WRITE_DEPTH_COUNT |
- PIPE_CONTROL_WRITE_TIMESTAMP |
- PIPE_CONTROL_STALL_AT_SCOREBOARD |
- PIPE_CONTROL_DEPTH_STALL |
- PIPE_CONTROL_DATA_CACHE_FLUSH;
-
- /* If we're doing a CS stall, and don't already have one of the
- * workaround bits set, add "Stall at Pixel Scoreboard."
- */
- if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0)
- *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
-}
-
-/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
- *
- * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
- * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
- *
- * Note that the kernel does CS stalls between batches, so we only need
- * to count them within a batch.
- */
-static uint32_t
-gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
-{
- const struct gen_device_info *devinfo = &brw->screen->devinfo;
-
- if (devinfo->gen == 7 && !devinfo->is_haswell) {
- if (flags & PIPE_CONTROL_CS_STALL) {
- /* If we're doing a CS stall, reset the counter and carry on. */
- brw->pipe_controls_since_last_cs_stall = 0;
- return 0;
- }
-
- /* If this is the fourth pipe control without a CS stall, do one now. */
- if (++brw->pipe_controls_since_last_cs_stall == 4) {
- brw->pipe_controls_since_last_cs_stall = 0;
- return PIPE_CONTROL_CS_STALL;
- }
- }
- return 0;
-}
-
-/* #1130 from gen10 workarounds page in h/w specs:
- * "Enable Depth Stall on every Post Sync Op if Render target Cache Flush is
- * not enabled in same PIPE CONTROL and Enable Pixel score board stall if
- * Render target cache flush is enabled."
- *
- * Applicable to CNL B0 and C0 steppings only.
- */
-static void
-gen10_add_rcpfe_workaround_bits(uint32_t *flags)
-{
- if (*flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) {
- *flags = *flags | PIPE_CONTROL_STALL_AT_SCOREBOARD;
- } else if (*flags &
- (PIPE_CONTROL_WRITE_IMMEDIATE |
- PIPE_CONTROL_WRITE_DEPTH_COUNT |
- PIPE_CONTROL_WRITE_TIMESTAMP)) {
- *flags = *flags | PIPE_CONTROL_DEPTH_STALL;
- }
-}
-
-static void
-brw_emit_pipe_control(struct brw_context *brw, uint32_t flags,
- struct brw_bo *bo, uint32_t offset, uint64_t imm)
-{
- const struct gen_device_info *devinfo = &brw->screen->devinfo;
-
- if (devinfo->gen >= 8) {
- if (devinfo->gen == 8)
- gen8_add_cs_stall_workaround_bits(&flags);
-
- if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) {
- if (devinfo->gen == 9) {
- /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
- * lists several workarounds:
- *
- * "Project: SKL, KBL, BXT
- *
- * If the VF Cache Invalidation Enable is set to a 1 in a
- * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
- * sets to 0, with the VF Cache Invalidation Enable set to 0
- * needs to be sent prior to the PIPE_CONTROL with VF Cache
- * Invalidation Enable set to a 1."
- */
- brw_emit_pipe_control_flush(brw, 0);
- }
-
- if (devinfo->gen >= 9) {
- /* THE PIPE_CONTROL "VF Cache Invalidation Enable" docs continue:
- *
- * "Project: BDW+
- *
- * When VF Cache Invalidate is set “Post Sync Operation” must
- * be enabled to “Write Immediate Data” or “Write PS Depth
- * Count” or “Write Timestamp”."
- *
- * If there's a BO, we're already doing some kind of write.
- * If not, add a write to the workaround BO.
- *
- * XXX: This causes GPU hangs on Broadwell, so restrict it to
- * Gen9+ for now...see this bug for more information:
- * https://bugs.freedesktop.org/show_bug.cgi?id=103787
- */
- if (!bo) {
- flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
- bo = brw->workaround_bo;
- }
- }
- }
-
- if (devinfo->gen == 10)
- gen10_add_rcpfe_workaround_bits(&flags);
-
- BEGIN_BATCH(6);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
- OUT_BATCH(flags);
- if (bo) {
- OUT_RELOC64(bo, RELOC_WRITE, offset);
- } else {
- OUT_BATCH(0);
- OUT_BATCH(0);
- }
- OUT_BATCH(imm);
- OUT_BATCH(imm >> 32);
- ADVANCE_BATCH();
- } else if (devinfo->gen >= 6) {
- if (devinfo->gen == 6 &&
- (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
- /* Hardware workaround: SNB B-Spec says:
- *
- * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
- * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
- * required.
- */
- brw_emit_post_sync_nonzero_flush(brw);
- }
-
- flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
-
- /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24
- * on later platforms. We always use PPGTT on Gen7+.
- */
- unsigned gen6_gtt = devinfo->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0;
-
- BEGIN_BATCH(5);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
- OUT_BATCH(flags);
- if (bo) {
- OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, gen6_gtt | offset);
- } else {
- OUT_BATCH(0);
- }
- OUT_BATCH(imm);
- OUT_BATCH(imm >> 32);
- ADVANCE_BATCH();
- } else {
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
- if (bo) {
- OUT_RELOC(bo, RELOC_WRITE, PIPE_CONTROL_GLOBAL_GTT_WRITE | offset);
- } else {
- OUT_BATCH(0);
- }
- OUT_BATCH(imm);
- OUT_BATCH(imm >> 32);
- ADVANCE_BATCH();
- }
-}
-
/**
* Emit a PIPE_CONTROL with various flushing flags.
*
@@ -246,7 +56,7 @@ brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
}
- brw_emit_pipe_control(brw, flags, NULL, 0, 0);
+ brw->vtbl.emit_raw_pipe_control(brw, flags, NULL, 0, 0);
}
/**
@@ -262,7 +72,7 @@ brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
struct brw_bo *bo, uint32_t offset,
uint64_t imm)
{
- brw_emit_pipe_control(brw, flags, bo, offset, imm);
+ brw->vtbl.emit_raw_pipe_control(brw, flags, bo, offset, imm);
}
/**
@@ -357,14 +167,14 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw)
void
gen10_emit_isp_disable(struct brw_context *brw)
{
- brw_emit_pipe_control(brw,
- PIPE_CONTROL_STALL_AT_SCOREBOARD |
- PIPE_CONTROL_CS_STALL,
- NULL, 0, 0);
- brw_emit_pipe_control(brw,
- PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE |
- PIPE_CONTROL_CS_STALL,
- NULL, 0, 0);
+ brw->vtbl.emit_raw_pipe_control(brw,
+ PIPE_CONTROL_STALL_AT_SCOREBOARD |
+ PIPE_CONTROL_CS_STALL,
+ NULL, 0, 0);
+ brw->vtbl.emit_raw_pipe_control(brw,
+ PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE |
+ PIPE_CONTROL_CS_STALL,
+ NULL, 0, 0);
brw->vs.base.push_constants_dirty = true;
brw->tcs.base.push_constants_dirty = true;
@@ -561,6 +371,34 @@ int
brw_init_pipe_control(struct brw_context *brw,
const struct gen_device_info *devinfo)
{
+ switch (devinfo->gen) {
+ case 10:
+ brw->vtbl.emit_raw_pipe_control = gen10_emit_raw_pipe_control;
+ break;
+ case 9:
+ brw->vtbl.emit_raw_pipe_control = gen9_emit_raw_pipe_control;
+ break;
+ case 8:
+ brw->vtbl.emit_raw_pipe_control = gen8_emit_raw_pipe_control;
+ break;
+ case 7:
+ brw->vtbl.emit_raw_pipe_control =
+ devinfo->is_haswell ? gen75_emit_raw_pipe_control
+ : gen7_emit_raw_pipe_control;
+ break;
+ case 6:
+ brw->vtbl.emit_raw_pipe_control = gen6_emit_raw_pipe_control;
+ break;
+ case 5:
+ brw->vtbl.emit_raw_pipe_control = gen5_emit_raw_pipe_control;
+ break;
+ case 4:
+ brw->vtbl.emit_raw_pipe_control =
+ devinfo->is_g4x ? gen45_emit_raw_pipe_control
+ : gen4_emit_raw_pipe_control;
+ break;
+ }
+
if (devinfo->gen < 6)
return 0;
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.h b/src/mesa/drivers/dri/i965/brw_pipe_control.h
index 69b1c7c31e6..e213f43a4f7 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.h
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.h
@@ -32,34 +32,38 @@ struct brw_bo;
*
* PIPE_CONTROL operation, a combination MI_FLUSH and register write with
* additional flushing control.
+ *
+ * The bits here are not the actual hardware values. The actual values
+ * shift around a bit per-generation, so we just have flags for each
+ * potential operation, and use genxml to encode the actual packet.
*/
-#define _3DSTATE_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24))
-#define PIPE_CONTROL_LRI_WRITE_IMMEDIATE (1 << 23) /* Gen7+ */
-#define PIPE_CONTROL_CS_STALL (1 << 20)
-#define PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET (1 << 19)
-#define PIPE_CONTROL_TLB_INVALIDATE (1 << 18)
-#define PIPE_CONTROL_SYNC_GFDT (1 << 17)
-#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1 << 16)
-#define PIPE_CONTROL_NO_WRITE (0 << 14)
-#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14)
-#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14)
-#define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14)
-#define PIPE_CONTROL_DEPTH_STALL (1 << 13)
-#define PIPE_CONTROL_RENDER_TARGET_FLUSH (1 << 12)
-#define PIPE_CONTROL_INSTRUCTION_INVALIDATE (1 << 11)
-#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1 << 10) /* GM45+ only */
-#define PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE (1 << 9)
-#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8)
-#define PIPE_CONTROL_FLUSH_ENABLE (1 << 7) /* Gen7+ only */
-/* GT */
-#define PIPE_CONTROL_DATA_CACHE_FLUSH (1 << 5)
-#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4)
-#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3)
-#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2)
-#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
-#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
-#define PIPE_CONTROL_PPGTT_WRITE (0 << 2)
-#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2)
+enum pipe_control_flags
+{
+ PIPE_CONTROL_FLUSH_LLC = (1 << 1),
+ PIPE_CONTROL_LRI_POST_SYNC_OP = (1 << 2),
+ PIPE_CONTROL_STORE_DATA_INDEX = (1 << 3),
+ PIPE_CONTROL_CS_STALL = (1 << 4),
+ PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET = (1 << 5),
+ PIPE_CONTROL_SYNC_GFDT = (1 << 6),
+ PIPE_CONTROL_TLB_INVALIDATE = (1 << 7),
+ PIPE_CONTROL_MEDIA_STATE_CLEAR = (1 << 8),
+ PIPE_CONTROL_WRITE_IMMEDIATE = (1 << 9),
+ PIPE_CONTROL_WRITE_DEPTH_COUNT = (1 << 10),
+ PIPE_CONTROL_WRITE_TIMESTAMP = (1 << 11),
+ PIPE_CONTROL_DEPTH_STALL = (1 << 12),
+ PIPE_CONTROL_RENDER_TARGET_FLUSH = (1 << 13),
+ PIPE_CONTROL_INSTRUCTION_INVALIDATE = (1 << 14),
+ PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE = (1 << 15),
+ PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE = (1 << 16),
+ PIPE_CONTROL_NOTIFY_ENABLE = (1 << 17),
+ PIPE_CONTROL_FLUSH_ENABLE = (1 << 18),
+ PIPE_CONTROL_DATA_CACHE_FLUSH = (1 << 19),
+ PIPE_CONTROL_VF_CACHE_INVALIDATE = (1 << 20),
+ PIPE_CONTROL_CONST_CACHE_INVALIDATE = (1 << 21),
+ PIPE_CONTROL_STATE_CACHE_INVALIDATE = (1 << 22),
+ PIPE_CONTROL_STALL_AT_SCOREBOARD = (1 << 23),
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH = (1 << 24),
+};
#define PIPE_CONTROL_CACHE_FLUSH_BITS \
(PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | \
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index f6acf81b899..b62890729fa 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -95,6 +95,37 @@ extern const struct brw_tracked_state gen7_urb;
extern const struct brw_tracked_state gen8_pma_fix;
extern const struct brw_tracked_state brw_cs_work_groups_surface;
+void gen4_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+ struct brw_bo *bo, uint32_t offset,
+ uint64_t imm);
+void gen45_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+ struct brw_bo *bo, uint32_t offset,
+ uint64_t imm);
+void gen5_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+ struct brw_bo *bo, uint32_t offset,
+ uint64_t imm);
+void gen6_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+ struct brw_bo *bo, uint32_t offset,
+ uint64_t imm);
+void gen7_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+ struct brw_bo *bo, uint32_t offset,
+ uint64_t imm);
+void gen75_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+ struct brw_bo *bo, uint32_t offset,
+ uint64_t imm);
+void gen8_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+ struct brw_bo *bo, uint32_t offset,
+ uint64_t imm);
+void gen9_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+ struct brw_bo *bo, uint32_t offset,
+ uint64_t imm);
+void gen10_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+ struct brw_bo *bo, uint32_t offset,
+ uint64_t imm);
+void gen11_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+ struct brw_bo *bo, uint32_t offset,
+ uint64_t imm);
+
static inline bool
brw_state_dirty(const struct brw_context *brw,
GLuint mesa_flags, uint64_t brw_flags)
diff --git a/src/mesa/drivers/dri/i965/genX_pipe_control.c b/src/mesa/drivers/dri/i965/genX_pipe_control.c
new file mode 100644
index 00000000000..8eb37444253
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/genX_pipe_control.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "genX_boilerplate.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+
+/**
+ * According to the latest documentation, any PIPE_CONTROL with the
+ * "Command Streamer Stall" bit set must also have another bit set,
+ * with five different options:
+ *
+ * - Render Target Cache Flush
+ * - Depth Cache Flush
+ * - Stall at Pixel Scoreboard
+ * - Post-Sync Operation
+ * - Depth Stall
+ * - DC Flush Enable
+ *
+ * I chose "Stall at Pixel Scoreboard" since we've used it effectively
+ * in the past, but the choice is fairly arbitrary.
+ */
+static void
+gen8_add_cs_stall_workaround_bits(uint32_t *flags)
+{
+ uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_WRITE_IMMEDIATE |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT |
+ PIPE_CONTROL_WRITE_TIMESTAMP |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD |
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_DATA_CACHE_FLUSH;
+
+ /* If we're doing a CS stall, and don't already have one of the
+ * workaround bits set, add "Stall at Pixel Scoreboard."
+ */
+ if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0)
+ *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
+}
+
+/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
+ *
+ * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
+ * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
+ *
+ * Note that the kernel does CS stalls between batches, so we only need
+ * to count them within a batch.
+ */
+static uint32_t
+gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
+{
+ if (GEN_GEN == 7 && !GEN_IS_HASWELL) {
+ if (flags & PIPE_CONTROL_CS_STALL) {
+ /* If we're doing a CS stall, reset the counter and carry on. */
+ brw->pipe_controls_since_last_cs_stall = 0;
+ return 0;
+ }
+
+ /* If this is the fourth pipe control without a CS stall, do one now. */
+ if (++brw->pipe_controls_since_last_cs_stall == 4) {
+ brw->pipe_controls_since_last_cs_stall = 0;
+ return PIPE_CONTROL_CS_STALL;
+ }
+ }
+ return 0;
+}
+
+/* #1130 from gen10 workarounds page in h/w specs:
+ * "Enable Depth Stall on every Post Sync Op if Render target Cache Flush is
+ * not enabled in same PIPE CONTROL and Enable Pixel score board stall if
+ * Render target cache flush is enabled."
+ *
+ * Applicable to CNL B0 and C0 steppings only.
+ */
+static void
+gen10_add_rcpfe_workaround_bits(uint32_t *flags)
+{
+ if (*flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) {
+ *flags = *flags | PIPE_CONTROL_STALL_AT_SCOREBOARD;
+ } else if (*flags &
+ (PIPE_CONTROL_WRITE_IMMEDIATE |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT |
+ PIPE_CONTROL_WRITE_TIMESTAMP)) {
+ *flags = *flags | PIPE_CONTROL_DEPTH_STALL;
+ }
+}
+
+static unsigned
+flags_to_post_sync_op(uint32_t flags)
+{
+ flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT |
+ PIPE_CONTROL_WRITE_TIMESTAMP;
+
+ assert(util_bitcount(flags) <= 1);
+
+ if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
+ return WriteImmediateData;
+
+ if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
+ return WritePSDepthCount;
+
+ if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
+ return WriteTimestamp;
+
+ return 0;
+}
+
+void
+genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
+ struct brw_bo *bo, uint32_t offset, uint64_t imm)
+{
+ if (GEN_GEN >= 8) {
+ if (GEN_GEN == 8)
+ gen8_add_cs_stall_workaround_bits(&flags);
+
+ if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) {
+ if (GEN_GEN == 9) {
+ /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
+ * lists several workarounds:
+ *
+ * "Project: SKL, KBL, BXT
+ *
+ * If the VF Cache Invalidation Enable is set to a 1 in a
+ * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
+ * sets to 0, with the VF Cache Invalidation Enable set to 0
+ * needs to be sent prior to the PIPE_CONTROL with VF Cache
+ * Invalidation Enable set to a 1."
+ */
+ brw_emit_pipe_control_flush(brw, 0);
+ }
+
+ if (GEN_GEN >= 9) {
+ /* THE PIPE_CONTROL "VF Cache Invalidation Enable" docs continue:
+ *
+ * "Project: BDW+
+ *
+ * When VF Cache Invalidate is set “Post Sync Operation” must
+ * be enabled to “Write Immediate Data” or “Write PS Depth
+ * Count” or “Write Timestamp”."
+ *
+ * If there's a BO, we're already doing some kind of write.
+ * If not, add a write to the workaround BO.
+ *
+ * XXX: This causes GPU hangs on Broadwell, so restrict it to
+ * Gen9+ for now...see this bug for more information:
+ * https://bugs.freedesktop.org/show_bug.cgi?id=103787
+ */
+ if (!bo) {
+ flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
+ bo = brw->workaround_bo;
+ }
+ }
+ }
+
+ if (GEN_GEN == 10)
+ gen10_add_rcpfe_workaround_bits(&flags);
+ } else if (GEN_GEN >= 6) {
+ if (GEN_GEN == 6 &&
+ (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
+ /* Hardware workaround: SNB B-Spec says:
+ *
+ * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
+ * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
+ * required.
+ */
+ brw_emit_post_sync_nonzero_flush(brw);
+ }
+
+ flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
+ }
+
+ brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) {
+ #if GEN_GEN >= 9
+ pc.FlushLLC = 0;
+ #endif
+ #if GEN_GEN >= 7
+ pc.LRIPostSyncOperation = NoLRIOperation;
+ pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE;
+ pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH;
+ #endif
+ #if GEN_GEN >= 6
+ pc.StoreDataIndex = 0;
+ pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL;
+ pc.GlobalSnapshotCountReset =
+ flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET;
+ pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE;
+ pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR;
+ pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD;
+ pc.RenderTargetCacheFlushEnable =
+ flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
+ pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ pc.StateCacheInvalidationEnable =
+ flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+ pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
+ pc.ConstantCacheInvalidationEnable =
+ flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+ #else
+ pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
+ #endif
+ pc.PostSyncOperation = flags_to_post_sync_op(flags);
+ pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL;
+ pc.InstructionCacheInvalidateEnable =
+ flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE;
+ pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE;
+ #if GEN_GEN >= 5 || GEN_IS_G4X
+ pc.IndirectStatePointersDisable =
+ flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE;
+ #endif
+ #if GEN_GEN >= 6
+ pc.TextureCacheInvalidationEnable =
+ flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+ #elif GEN_GEN == 5 || GEN_IS_G4X
+ pc.TextureCacheFlushEnable =
+ flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+ #endif
+ pc.Address = ggtt_bo(bo, offset);
+ if (GEN_GEN < 7 && bo)
+ pc.DestinationAddressType = DAT_GGTT;
+ pc.ImmediateData = imm;
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/meson.build b/src/mesa/drivers/dri/i965/meson.build
index 02f5f3073f7..e7c890785e2 100644
--- a/src/mesa/drivers/dri/i965/meson.build
+++ b/src/mesa/drivers/dri/i965/meson.build
@@ -147,8 +147,8 @@ i965_gen_libs = []
foreach v : ['40', '45', '50', '60', '70', '75', '80', '90', '100', '110']
i965_gen_libs += static_library(
'i965_gen at 0@'.format(v),
- ['genX_blorp_exec.c', 'genX_boilerplate.h', 'genX_state_upload.c',
- gen_xml_pack],
+ ['genX_blorp_exec.c', 'genX_boilerplate.h', 'genX_pipe_control.c',
+ 'genX_state_upload.c', gen_xml_pack],
include_directories : [inc_common, inc_intel, inc_dri_common],
c_args : [
c_vis_args, no_override_init_args, c_sse2_args,
--
2.19.1
More information about the mesa-dev
mailing list