[Mesa-dev] [PATCH 3/4] i965: Use genxml for emitting PIPE_CONTROL.
Pohjolainen, Topi
topi.pohjolainen at gmail.com
Fri Feb 22 12:49:04 UTC 2019
On Thu, Nov 01, 2018 at 08:04:20PM -0700, Kenneth Graunke wrote:
> While this does add a bunch of boilerplate, it also protects us against
> the hardware moving bits, or changing their meaning. For something as
> finnicky as PIPE_CONTROL, the extra safety seems worth it.
>
> We turn PIPE_CONTROL_* into an bitfield of arbitrary flags, and then
> pack them appropriately.
This is clearly better than before in my opinion. Few suggestions below but
patches 1-3 are:
Reviewed-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
> ---
> src/mesa/drivers/dri/i965/Makefile.sources | 9 +
> src/mesa/drivers/dri/i965/brw_context.h | 4 +
> src/mesa/drivers/dri/i965/brw_pipe_control.c | 240 +++--------------
> src/mesa/drivers/dri/i965/brw_pipe_control.h | 58 +++--
> src/mesa/drivers/dri/i965/brw_state.h | 31 +++
> src/mesa/drivers/dri/i965/genX_pipe_control.c | 243 ++++++++++++++++++
> src/mesa/drivers/dri/i965/meson.build | 4 +-
> 7 files changed, 359 insertions(+), 230 deletions(-)
> create mode 100644 src/mesa/drivers/dri/i965/genX_pipe_control.c
>
> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
> index 63fa7b886f2..e4eb0339e09 100644
> --- a/src/mesa/drivers/dri/i965/Makefile.sources
> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
> @@ -127,46 +127,55 @@ intel_tiled_memcpy_dep_FILES = \
> i965_gen4_FILES = \
> genX_blorp_exec.c \
> genX_boilerplate.h \
> + genX_pipe_control.c \
> genX_state_upload.c
>
> i965_gen45_FILES = \
> genX_blorp_exec.c \
> genX_boilerplate.h \
> + genX_pipe_control.c \
> genX_state_upload.c
>
> i965_gen5_FILES = \
> genX_blorp_exec.c \
> genX_boilerplate.h \
> + genX_pipe_control.c \
> genX_state_upload.c
>
> i965_gen6_FILES = \
> genX_blorp_exec.c \
> genX_boilerplate.h \
> + genX_pipe_control.c \
> genX_state_upload.c
>
> i965_gen7_FILES = \
> genX_blorp_exec.c \
> genX_boilerplate.h \
> + genX_pipe_control.c \
> genX_state_upload.c
>
> i965_gen75_FILES = \
> genX_blorp_exec.c \
> genX_boilerplate.h \
> + genX_pipe_control.c \
> genX_state_upload.c
>
> i965_gen8_FILES = \
> genX_blorp_exec.c \
> genX_boilerplate.h \
> + genX_pipe_control.c \
> genX_state_upload.c
>
> i965_gen9_FILES = \
> genX_blorp_exec.c \
> genX_boilerplate.h \
> + genX_pipe_control.c \
> genX_state_upload.c
>
> i965_gen10_FILES = \
> genX_blorp_exec.c \
> genX_boilerplate.h \
> + genX_pipe_control.c \
> genX_state_upload.c
>
> i965_gen11_FILES = \
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 7fd15669eb9..fe75425854c 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -752,6 +752,10 @@ struct brw_context
> struct brw_bo *bo,
> uint32_t offset_in_bytes,
> uint32_t report_id);
> +
> + void (*emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
> + struct brw_bo *bo, uint32_t offset,
> + uint64_t imm);
> } vtbl;
>
> struct brw_bufmgr *bufmgr;
> diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> index 4d76e5dc9b7..cf9cc35875f 100644
> --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
> +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> @@ -23,200 +23,10 @@
>
> #include "brw_context.h"
> #include "brw_defines.h"
> +#include "brw_state.h"
> #include "intel_batchbuffer.h"
> #include "intel_fbo.h"
>
> -/**
> - * According to the latest documentation, any PIPE_CONTROL with the
> - * "Command Streamer Stall" bit set must also have another bit set,
> - * with five different options:
> - *
> - * - Render Target Cache Flush
> - * - Depth Cache Flush
> - * - Stall at Pixel Scoreboard
> - * - Post-Sync Operation
> - * - Depth Stall
> - * - DC Flush Enable
> - *
> - * I chose "Stall at Pixel Scoreboard" since we've used it effectively
> - * in the past, but the choice is fairly arbitrary.
> - */
> -static void
> -gen8_add_cs_stall_workaround_bits(uint32_t *flags)
> -{
> - uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
> - PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> - PIPE_CONTROL_WRITE_IMMEDIATE |
> - PIPE_CONTROL_WRITE_DEPTH_COUNT |
> - PIPE_CONTROL_WRITE_TIMESTAMP |
> - PIPE_CONTROL_STALL_AT_SCOREBOARD |
> - PIPE_CONTROL_DEPTH_STALL |
> - PIPE_CONTROL_DATA_CACHE_FLUSH;
> -
> - /* If we're doing a CS stall, and don't already have one of the
> - * workaround bits set, add "Stall at Pixel Scoreboard."
> - */
> - if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0)
> - *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
> -}
> -
> -/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
> - *
> - * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
> - * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
> - *
> - * Note that the kernel does CS stalls between batches, so we only need
> - * to count them within a batch.
> - */
> -static uint32_t
> -gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
> -{
> - const struct gen_device_info *devinfo = &brw->screen->devinfo;
> -
> - if (devinfo->gen == 7 && !devinfo->is_haswell) {
> - if (flags & PIPE_CONTROL_CS_STALL) {
> - /* If we're doing a CS stall, reset the counter and carry on. */
> - brw->pipe_controls_since_last_cs_stall = 0;
> - return 0;
> - }
> -
> - /* If this is the fourth pipe control without a CS stall, do one now. */
> - if (++brw->pipe_controls_since_last_cs_stall == 4) {
> - brw->pipe_controls_since_last_cs_stall = 0;
> - return PIPE_CONTROL_CS_STALL;
> - }
> - }
> - return 0;
> -}
> -
> -/* #1130 from gen10 workarounds page in h/w specs:
> - * "Enable Depth Stall on every Post Sync Op if Render target Cache Flush is
> - * not enabled in same PIPE CONTROL and Enable Pixel score board stall if
> - * Render target cache flush is enabled."
> - *
> - * Applicable to CNL B0 and C0 steppings only.
> - */
> -static void
> -gen10_add_rcpfe_workaround_bits(uint32_t *flags)
> -{
> - if (*flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) {
> - *flags = *flags | PIPE_CONTROL_STALL_AT_SCOREBOARD;
> - } else if (*flags &
> - (PIPE_CONTROL_WRITE_IMMEDIATE |
> - PIPE_CONTROL_WRITE_DEPTH_COUNT |
> - PIPE_CONTROL_WRITE_TIMESTAMP)) {
> - *flags = *flags | PIPE_CONTROL_DEPTH_STALL;
> - }
> -}
> -
> -static void
> -brw_emit_pipe_control(struct brw_context *brw, uint32_t flags,
> - struct brw_bo *bo, uint32_t offset, uint64_t imm)
> -{
> - const struct gen_device_info *devinfo = &brw->screen->devinfo;
> -
> - if (devinfo->gen >= 8) {
> - if (devinfo->gen == 8)
> - gen8_add_cs_stall_workaround_bits(&flags);
> -
> - if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) {
> - if (devinfo->gen == 9) {
> - /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
> - * lists several workarounds:
> - *
> - * "Project: SKL, KBL, BXT
> - *
> - * If the VF Cache Invalidation Enable is set to a 1 in a
> - * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
> - * sets to 0, with the VF Cache Invalidation Enable set to 0
> - * needs to be sent prior to the PIPE_CONTROL with VF Cache
> - * Invalidation Enable set to a 1."
> - */
> - brw_emit_pipe_control_flush(brw, 0);
> - }
> -
> - if (devinfo->gen >= 9) {
> - /* THE PIPE_CONTROL "VF Cache Invalidation Enable" docs continue:
> - *
> - * "Project: BDW+
> - *
> - * When VF Cache Invalidate is set “Post Sync Operation” must
> - * be enabled to “Write Immediate Data” or “Write PS Depth
> - * Count” or “Write Timestamp”."
> - *
> - * If there's a BO, we're already doing some kind of write.
> - * If not, add a write to the workaround BO.
> - *
> - * XXX: This causes GPU hangs on Broadwell, so restrict it to
> - * Gen9+ for now...see this bug for more information:
> - * https://bugs.freedesktop.org/show_bug.cgi?id=103787
> - */
> - if (!bo) {
> - flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
> - bo = brw->workaround_bo;
> - }
> - }
> - }
> -
> - if (devinfo->gen == 10)
> - gen10_add_rcpfe_workaround_bits(&flags);
> -
> - BEGIN_BATCH(6);
> - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
> - OUT_BATCH(flags);
> - if (bo) {
> - OUT_RELOC64(bo, RELOC_WRITE, offset);
> - } else {
> - OUT_BATCH(0);
> - OUT_BATCH(0);
> - }
> - OUT_BATCH(imm);
> - OUT_BATCH(imm >> 32);
> - ADVANCE_BATCH();
> - } else if (devinfo->gen >= 6) {
> - if (devinfo->gen == 6 &&
> - (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
> - /* Hardware workaround: SNB B-Spec says:
> - *
> - * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
> - * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
> - * required.
> - */
> - brw_emit_post_sync_nonzero_flush(brw);
> - }
> -
> - flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
> -
> - /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24
> - * on later platforms. We always use PPGTT on Gen7+.
> - */
> - unsigned gen6_gtt = devinfo->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0;
> -
> - BEGIN_BATCH(5);
> - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
> - OUT_BATCH(flags);
> - if (bo) {
> - OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, gen6_gtt | offset);
> - } else {
> - OUT_BATCH(0);
> - }
> - OUT_BATCH(imm);
> - OUT_BATCH(imm >> 32);
> - ADVANCE_BATCH();
> - } else {
> - BEGIN_BATCH(4);
> - OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
> - if (bo) {
> - OUT_RELOC(bo, RELOC_WRITE, PIPE_CONTROL_GLOBAL_GTT_WRITE | offset);
> - } else {
> - OUT_BATCH(0);
> - }
> - OUT_BATCH(imm);
> - OUT_BATCH(imm >> 32);
> - ADVANCE_BATCH();
> - }
> -}
> -
> /**
> * Emit a PIPE_CONTROL with various flushing flags.
> *
> @@ -246,7 +56,7 @@ brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
> flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
> }
>
> - brw_emit_pipe_control(brw, flags, NULL, 0, 0);
> + brw->vtbl.emit_raw_pipe_control(brw, flags, NULL, 0, 0);
> }
>
> /**
> @@ -262,7 +72,7 @@ brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
> struct brw_bo *bo, uint32_t offset,
> uint64_t imm)
> {
> - brw_emit_pipe_control(brw, flags, bo, offset, imm);
> + brw->vtbl.emit_raw_pipe_control(brw, flags, bo, offset, imm);
> }
>
> /**
> @@ -357,14 +167,14 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw)
> void
> gen10_emit_isp_disable(struct brw_context *brw)
> {
> - brw_emit_pipe_control(brw,
> - PIPE_CONTROL_STALL_AT_SCOREBOARD |
> - PIPE_CONTROL_CS_STALL,
> - NULL, 0, 0);
> - brw_emit_pipe_control(brw,
> - PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE |
> - PIPE_CONTROL_CS_STALL,
> - NULL, 0, 0);
> + brw->vtbl.emit_raw_pipe_control(brw,
> + PIPE_CONTROL_STALL_AT_SCOREBOARD |
> + PIPE_CONTROL_CS_STALL,
> + NULL, 0, 0);
> + brw->vtbl.emit_raw_pipe_control(brw,
> + PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE |
> + PIPE_CONTROL_CS_STALL,
> + NULL, 0, 0);
>
> brw->vs.base.push_constants_dirty = true;
> brw->tcs.base.push_constants_dirty = true;
> @@ -561,6 +371,34 @@ int
> brw_init_pipe_control(struct brw_context *brw,
> const struct gen_device_info *devinfo)
> {
> + switch (devinfo->gen) {
> + case 10:
> + brw->vtbl.emit_raw_pipe_control = gen10_emit_raw_pipe_control;
> + break;
> + case 9:
> + brw->vtbl.emit_raw_pipe_control = gen9_emit_raw_pipe_control;
> + break;
> + case 8:
> + brw->vtbl.emit_raw_pipe_control = gen8_emit_raw_pipe_control;
> + break;
> + case 7:
> + brw->vtbl.emit_raw_pipe_control =
> + devinfo->is_haswell ? gen75_emit_raw_pipe_control
> + : gen7_emit_raw_pipe_control;
> + break;
> + case 6:
> + brw->vtbl.emit_raw_pipe_control = gen6_emit_raw_pipe_control;
> + break;
> + case 5:
> + brw->vtbl.emit_raw_pipe_control = gen5_emit_raw_pipe_control;
> + break;
> + case 4:
> + brw->vtbl.emit_raw_pipe_control =
> + devinfo->is_g4x ? gen45_emit_raw_pipe_control
> + : gen4_emit_raw_pipe_control;
> + break;
> + }
> +
> if (devinfo->gen < 6)
> return 0;
>
> diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.h b/src/mesa/drivers/dri/i965/brw_pipe_control.h
> index 69b1c7c31e6..e213f43a4f7 100644
> --- a/src/mesa/drivers/dri/i965/brw_pipe_control.h
> +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.h
> @@ -32,34 +32,38 @@ struct brw_bo;
> *
> * PIPE_CONTROL operation, a combination MI_FLUSH and register write with
> * additional flushing control.
> + *
> + * The bits here are not the actual hardware values. The actual values
> + * shift around a bit per-generation, so we just have flags for each
> + * potential operation, and use genxml to encode the actual packet.
> */
> -#define _3DSTATE_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24))
> -#define PIPE_CONTROL_LRI_WRITE_IMMEDIATE (1 << 23) /* Gen7+ */
> -#define PIPE_CONTROL_CS_STALL (1 << 20)
> -#define PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET (1 << 19)
> -#define PIPE_CONTROL_TLB_INVALIDATE (1 << 18)
> -#define PIPE_CONTROL_SYNC_GFDT (1 << 17)
> -#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1 << 16)
> -#define PIPE_CONTROL_NO_WRITE (0 << 14)
> -#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14)
> -#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14)
> -#define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14)
> -#define PIPE_CONTROL_DEPTH_STALL (1 << 13)
> -#define PIPE_CONTROL_RENDER_TARGET_FLUSH (1 << 12)
> -#define PIPE_CONTROL_INSTRUCTION_INVALIDATE (1 << 11)
> -#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1 << 10) /* GM45+ only */
> -#define PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE (1 << 9)
> -#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8)
> -#define PIPE_CONTROL_FLUSH_ENABLE (1 << 7) /* Gen7+ only */
> -/* GT */
> -#define PIPE_CONTROL_DATA_CACHE_FLUSH (1 << 5)
> -#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4)
> -#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3)
> -#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2)
> -#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
> -#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
> -#define PIPE_CONTROL_PPGTT_WRITE (0 << 2)
> -#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2)
> +enum pipe_control_flags
> +{
> + PIPE_CONTROL_FLUSH_LLC = (1 << 1),
> + PIPE_CONTROL_LRI_POST_SYNC_OP = (1 << 2),
> + PIPE_CONTROL_STORE_DATA_INDEX = (1 << 3),
> + PIPE_CONTROL_CS_STALL = (1 << 4),
> + PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET = (1 << 5),
> + PIPE_CONTROL_SYNC_GFDT = (1 << 6),
> + PIPE_CONTROL_TLB_INVALIDATE = (1 << 7),
> + PIPE_CONTROL_MEDIA_STATE_CLEAR = (1 << 8),
> + PIPE_CONTROL_WRITE_IMMEDIATE = (1 << 9),
> + PIPE_CONTROL_WRITE_DEPTH_COUNT = (1 << 10),
> + PIPE_CONTROL_WRITE_TIMESTAMP = (1 << 11),
> + PIPE_CONTROL_DEPTH_STALL = (1 << 12),
> + PIPE_CONTROL_RENDER_TARGET_FLUSH = (1 << 13),
> + PIPE_CONTROL_INSTRUCTION_INVALIDATE = (1 << 14),
> + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE = (1 << 15),
> + PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE = (1 << 16),
> + PIPE_CONTROL_NOTIFY_ENABLE = (1 << 17),
> + PIPE_CONTROL_FLUSH_ENABLE = (1 << 18),
> + PIPE_CONTROL_DATA_CACHE_FLUSH = (1 << 19),
> + PIPE_CONTROL_VF_CACHE_INVALIDATE = (1 << 20),
> + PIPE_CONTROL_CONST_CACHE_INVALIDATE = (1 << 21),
> + PIPE_CONTROL_STATE_CACHE_INVALIDATE = (1 << 22),
> + PIPE_CONTROL_STALL_AT_SCOREBOARD = (1 << 23),
> + PIPE_CONTROL_DEPTH_CACHE_FLUSH = (1 << 24),
> +};
>
> #define PIPE_CONTROL_CACHE_FLUSH_BITS \
> (PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | \
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
> index f6acf81b899..b62890729fa 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -95,6 +95,37 @@ extern const struct brw_tracked_state gen7_urb;
> extern const struct brw_tracked_state gen8_pma_fix;
> extern const struct brw_tracked_state brw_cs_work_groups_surface;
>
> +void gen4_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> + struct brw_bo *bo, uint32_t offset,
> + uint64_t imm);
> +void gen45_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> + struct brw_bo *bo, uint32_t offset,
> + uint64_t imm);
> +void gen5_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> + struct brw_bo *bo, uint32_t offset,
> + uint64_t imm);
> +void gen6_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> + struct brw_bo *bo, uint32_t offset,
> + uint64_t imm);
> +void gen7_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> + struct brw_bo *bo, uint32_t offset,
> + uint64_t imm);
> +void gen75_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> + struct brw_bo *bo, uint32_t offset,
> + uint64_t imm);
> +void gen8_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> + struct brw_bo *bo, uint32_t offset,
> + uint64_t imm);
> +void gen9_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> + struct brw_bo *bo, uint32_t offset,
> + uint64_t imm);
> +void gen10_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> + struct brw_bo *bo, uint32_t offset,
> + uint64_t imm);
> +void gen11_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> + struct brw_bo *bo, uint32_t offset,
> + uint64_t imm);
> +
> static inline bool
> brw_state_dirty(const struct brw_context *brw,
> GLuint mesa_flags, uint64_t brw_flags)
> diff --git a/src/mesa/drivers/dri/i965/genX_pipe_control.c b/src/mesa/drivers/dri/i965/genX_pipe_control.c
> new file mode 100644
> index 00000000000..8eb37444253
> --- /dev/null
> +++ b/src/mesa/drivers/dri/i965/genX_pipe_control.c
> @@ -0,0 +1,243 @@
> +/*
> + * Copyright © 2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "genX_boilerplate.h"
> +#include "brw_defines.h"
> +#include "brw_state.h"
> +
> +/**
> + * According to the latest documentation, any PIPE_CONTROL with the
> + * "Command Streamer Stall" bit set must also have another bit set,
> + * with five different options:
> + *
> + * - Render Target Cache Flush
> + * - Depth Cache Flush
> + * - Stall at Pixel Scoreboard
> + * - Post-Sync Operation
> + * - Depth Stall
> + * - DC Flush Enable
> + *
> + * I chose "Stall at Pixel Scoreboard" since we've used it effectively
> + * in the past, but the choice is fairly arbitrary.
> + */
> +static void
> +gen8_add_cs_stall_workaround_bits(uint32_t *flags)
> +{
> + uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
> + PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> + PIPE_CONTROL_WRITE_IMMEDIATE |
> + PIPE_CONTROL_WRITE_DEPTH_COUNT |
> + PIPE_CONTROL_WRITE_TIMESTAMP |
> + PIPE_CONTROL_STALL_AT_SCOREBOARD |
> + PIPE_CONTROL_DEPTH_STALL |
> + PIPE_CONTROL_DATA_CACHE_FLUSH;
> +
> + /* If we're doing a CS stall, and don't already have one of the
> + * workaround bits set, add "Stall at Pixel Scoreboard."
> + */
> + if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0)
> + *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
> +}
> +
> +/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
> + *
> + * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
> + * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
> + *
> + * Note that the kernel does CS stalls between batches, so we only need
> + * to count them within a batch.
> + */
> +static uint32_t
> +gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
> +{
> + if (GEN_GEN == 7 && !GEN_IS_HASWELL) {
> + if (flags & PIPE_CONTROL_CS_STALL) {
> + /* If we're doing a CS stall, reset the counter and carry on. */
> + brw->pipe_controls_since_last_cs_stall = 0;
> + return 0;
> + }
> +
> + /* If this is the fourth pipe control without a CS stall, do one now. */
> + if (++brw->pipe_controls_since_last_cs_stall == 4) {
> + brw->pipe_controls_since_last_cs_stall = 0;
> + return PIPE_CONTROL_CS_STALL;
> + }
> + }
> + return 0;
> +}
> +
> +/* #1130 from gen10 workarounds page in h/w specs:
> + * "Enable Depth Stall on every Post Sync Op if Render target Cache Flush is
> + * not enabled in same PIPE CONTROL and Enable Pixel score board stall if
> + * Render target cache flush is enabled."
> + *
> + * Applicable to CNL B0 and C0 steppings only.
> + */
> +static void
> +gen10_add_rcpfe_workaround_bits(uint32_t *flags)
> +{
> + if (*flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) {
> + *flags = *flags | PIPE_CONTROL_STALL_AT_SCOREBOARD;
> + } else if (*flags &
> + (PIPE_CONTROL_WRITE_IMMEDIATE |
> + PIPE_CONTROL_WRITE_DEPTH_COUNT |
> + PIPE_CONTROL_WRITE_TIMESTAMP)) {
> + *flags = *flags | PIPE_CONTROL_DEPTH_STALL;
> + }
> +}
> +
> +static unsigned
> +flags_to_post_sync_op(uint32_t flags)
> +{
> + flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
> + PIPE_CONTROL_WRITE_DEPTH_COUNT |
> + PIPE_CONTROL_WRITE_TIMESTAMP;
> +
> + assert(util_bitcount(flags) <= 1);
> +
> + if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
> + return WriteImmediateData;
> +
> + if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
> + return WritePSDepthCount;
> +
> + if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
> + return WriteTimestamp;
> +
> + return 0;
> +}
> +
> +void
> +genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
> + struct brw_bo *bo, uint32_t offset, uint64_t imm)
> +{
> + if (GEN_GEN >= 8) {
> + if (GEN_GEN == 8)
> + gen8_add_cs_stall_workaround_bits(&flags);
> +
> + if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) {
> + if (GEN_GEN == 9) {
> + /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
> + * lists several workarounds:
> + *
> + * "Project: SKL, KBL, BXT
> + *
> + * If the VF Cache Invalidation Enable is set to a 1 in a
> + * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
> + * sets to 0, with the VF Cache Invalidation Enable set to 0
> + * needs to be sent prior to the PIPE_CONTROL with VF Cache
> + * Invalidation Enable set to a 1."
> + */
> + brw_emit_pipe_control_flush(brw, 0);
> + }
> +
> + if (GEN_GEN >= 9) {
> + /* THE PIPE_CONTROL "VF Cache Invalidation Enable" docs continue:
> + *
> + * "Project: BDW+
> + *
> + * When VF Cache Invalidate is set “Post Sync Operation” must
> + * be enabled to “Write Immediate Data” or “Write PS Depth
> + * Count” or “Write Timestamp”."
> + *
> + * If there's a BO, we're already doing some kind of write.
> + * If not, add a write to the workaround BO.
> + *
> + * XXX: This causes GPU hangs on Broadwell, so restrict it to
> + * Gen9+ for now...see this bug for more information:
> + * https://bugs.freedesktop.org/show_bug.cgi?id=103787
> + */
> + if (!bo) {
> + flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
> + bo = brw->workaround_bo;
> + }
> + }
> + }
> +
> + if (GEN_GEN == 10)
> + gen10_add_rcpfe_workaround_bits(&flags);
> + } else if (GEN_GEN >= 6) {
> + if (GEN_GEN == 6 &&
> + (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
> + /* Hardware workaround: SNB B-Spec says:
> + *
> + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
> + * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
> + * required.
> + */
> + brw_emit_post_sync_nonzero_flush(brw);
> + }
> +
> + flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
> + }
> +
> + brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) {
> + #if GEN_GEN >= 9
> + pc.FlushLLC = 0;
> + #endif
> + #if GEN_GEN >= 7
> + pc.LRIPostSyncOperation = NoLRIOperation;
> + pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE;
> + pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH;
> + #endif
> + #if GEN_GEN >= 6
> + pc.StoreDataIndex = 0;
> + pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL;
> + pc.GlobalSnapshotCountReset =
> + flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET;
> + pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE;
While comparing this against src/intel/genxml/gen*.xml I noticed that gen6
also has:
<field name="Synchronize GFDT Surface" start="49" end="49" type="bool"/>
Should we add it for consistency?
> + pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR;
> + pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD;
gen5 also has:
<field name="Stall At Pixel Scoreboard" start="33" end="33" type="bool"/>
We didn't have this before and therefore this patch is fine. I just thought
I write it down since I came across that.
> + pc.RenderTargetCacheFlushEnable =
> + flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
> + pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH;
> + pc.StateCacheInvalidationEnable =
> + flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE;
> + pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
> + pc.ConstantCacheInvalidationEnable =
> + flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;
> + #else
> + pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
> + #endif
> + pc.PostSyncOperation = flags_to_post_sync_op(flags);
> + pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL;
> + pc.InstructionCacheInvalidateEnable =
> + flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE;
> + pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE;
> + #if GEN_GEN >= 5 || GEN_IS_G4X
> + pc.IndirectStatePointersDisable =
> + flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE;
> + #endif
> + #if GEN_GEN >= 6
> + pc.TextureCacheInvalidationEnable =
> + flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
> + #elif GEN_GEN == 5 || GEN_IS_G4X
> + pc.TextureCacheFlushEnable =
> + flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
gen5 also has:
<field name="Depth Cache Flush Inhibit" start="32" end="32" type="uint">
> + #endif
> + pc.Address = ggtt_bo(bo, offset);
> + if (GEN_GEN < 7 && bo)
> + pc.DestinationAddressType = DAT_GGTT;
> + pc.ImmediateData = imm;
> + }
> +}
> diff --git a/src/mesa/drivers/dri/i965/meson.build b/src/mesa/drivers/dri/i965/meson.build
> index 02f5f3073f7..e7c890785e2 100644
> --- a/src/mesa/drivers/dri/i965/meson.build
> +++ b/src/mesa/drivers/dri/i965/meson.build
> @@ -147,8 +147,8 @@ i965_gen_libs = []
> foreach v : ['40', '45', '50', '60', '70', '75', '80', '90', '100', '110']
> i965_gen_libs += static_library(
> 'i965_gen at 0@'.format(v),
> - ['genX_blorp_exec.c', 'genX_boilerplate.h', 'genX_state_upload.c',
> - gen_xml_pack],
> + ['genX_blorp_exec.c', 'genX_boilerplate.h', 'genX_pipe_control.c',
> + 'genX_state_upload.c', gen_xml_pack],
> include_directories : [inc_common, inc_intel, inc_dri_common],
> c_args : [
> c_vis_args, no_override_init_args, c_sse2_args,
> --
> 2.19.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list