[Mesa-dev] [PATCH 4/4] i965: Reimplement all the PIPE_CONTROL rules.

Pohjolainen, Topi topi.pohjolainen at gmail.com
Mon Feb 25 14:33:11 UTC 2019


On Thu, Nov 01, 2018 at 08:04:21PM -0700, Kenneth Graunke wrote:
> This implements virtually all documented PIPE_CONTROL restrictions
> in a centralized helper.  You now simply ask for the operations you
> want, and the pipe control "brain" will figure out exactly what pipe
> controls to emit to make that happen without tanking your system.
> 
> The hope is that this will fix some intermittent flushing issues as
> well as GPU hangs.  However, it also has a high risk of causing GPU
> hangs and other regressions, as this is a particularly sensitive
> area and poking the bear isn't always advisable.

First I checked I could find all the things in bspec. There was one that I
couldn't, noted further down.

Second I checked that all the rules earlier were implemented. Found one
exception, noted further down as well.

I didn't check if the rules still miss something in bspec. That would be
another exercise...

> ---
>  src/mesa/drivers/dri/i965/genX_pipe_control.c | 563 +++++++++++++-----
>  1 file changed, 428 insertions(+), 135 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/genX_pipe_control.c b/src/mesa/drivers/dri/i965/genX_pipe_control.c
> index 8eb37444253..503e674661b 100644
> --- a/src/mesa/drivers/dri/i965/genX_pipe_control.c
> +++ b/src/mesa/drivers/dri/i965/genX_pipe_control.c
> @@ -25,172 +25,465 @@
>  #include "brw_defines.h"
>  #include "brw_state.h"
>  
> +static unsigned
> +flags_to_post_sync_op(uint32_t flags)
> +{
> +   if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
> +      return WriteImmediateData;
> +
> +   if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
> +      return WritePSDepthCount;
> +
> +   if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
> +      return WriteTimestamp;
> +
> +   return 0;
> +}
> +
>  /**
> - * According to the latest documentation, any PIPE_CONTROL with the
> - * "Command Streamer Stall" bit set must also have another bit set,
> - * with five different options:
> - *
> - *  - Render Target Cache Flush
> - *  - Depth Cache Flush
> - *  - Stall at Pixel Scoreboard
> - *  - Post-Sync Operation
> - *  - Depth Stall
> - *  - DC Flush Enable
> - *
> - * I chose "Stall at Pixel Scoreboard" since we've used it effectively
> - * in the past, but the choice is fairly arbitrary.
> + * Do the given flags have a Post Sync or LRI Post Sync operation?
>   */
> -static void
> -gen8_add_cs_stall_workaround_bits(uint32_t *flags)
> +static enum pipe_control_flags
> +get_post_sync_flags(enum pipe_control_flags flags)
>  {
> -   uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
> -                      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> -                      PIPE_CONTROL_WRITE_IMMEDIATE |
> -                      PIPE_CONTROL_WRITE_DEPTH_COUNT |
> -                      PIPE_CONTROL_WRITE_TIMESTAMP |
> -                      PIPE_CONTROL_STALL_AT_SCOREBOARD |
> -                      PIPE_CONTROL_DEPTH_STALL |
> -                      PIPE_CONTROL_DATA_CACHE_FLUSH;
> -
> -   /* If we're doing a CS stall, and don't already have one of the
> -    * workaround bits set, add "Stall at Pixel Scoreboard."
> +   flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
> +            PIPE_CONTROL_WRITE_DEPTH_COUNT |
> +            PIPE_CONTROL_WRITE_TIMESTAMP |
> +            PIPE_CONTROL_LRI_POST_SYNC_OP;
> +
> +   /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with
> +    * "LRI Post Sync Operation".  So more than one bit set would be illegal.
>      */
> -   if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0)
> -      *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
> +   assert(util_bitcount(flags) <= 1);
> +
> +   return flags;
>  }
>  
> -/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
> +#define IS_COMPUTE_PIPELINE(brw) \
> +   (GEN_GEN >= 7 && brw->last_pipeline == BRW_COMPUTE_PIPELINE)
> +
> +/* Closed interval - GEN_GEN \in [x, y] */
> +#define IS_GEN_BETWEEN(x, y) (GEN_GEN >= x && GEN_GEN <= y)
> +#define IS_GENx10_BETWEEN(x, y) \
> +   (GEN_VERSIONx10 >= x && GEN_VERSIONx10 <= y)
> +
> +/**
> + * Emit a series of PIPE_CONTROL commands, taking into account any
> + * workarounds necessary to actually accomplish the caller's request.
> + *
> + * Unless otherwise noted, spec quotations in this function come from:
>   *
> - * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
> - *  only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
> + * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming
> + * Restrictions for PIPE_CONTROL.
>   *
> - * Note that the kernel does CS stalls between batches, so we only need
> - * to count them within a batch.
> + * You should not use this function directly.  Use the helpers in
> + * brw_pipe_control.c instead, which may split the pipe control further.
>   */
> -static uint32_t
> -gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
> +void
> +genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
> +                            struct brw_bo *bo, uint32_t offset, uint64_t imm)
>  {
> -   if (GEN_GEN == 7 && !GEN_IS_HASWELL) {
> -      if (flags & PIPE_CONTROL_CS_STALL) {
> -         /* If we're doing a CS stall, reset the counter and carry on. */
> -         brw->pipe_controls_since_last_cs_stall = 0;
> -         return 0;
> +   UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +   enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags);
> +   enum pipe_control_flags non_lri_post_sync_flags =
> +      post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP;
> +
> +   /* Recursive PIPE_CONTROL workarounds --------------------------------
> +    * (http://knowyourmeme.com/memes/xzibit-yo-dawg)
> +    *
> +    * We do these first because we want to look at the original operation,
> +    * rather than any workarounds we set.
> +    */
> +   if (GEN_GEN == 6 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
> +      /* Hardware workaround: SNB B-Spec says:
> +       *
> +       *    "[Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
> +       *     Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
> +       *     required."
> +       */
> +      brw_emit_post_sync_nonzero_flush(brw);
> +   }
> +
> +   if (GEN_GEN == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
> +      /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
> +       * lists several workarounds:
> +       *
> +       *    "Project: SKL, KBL, BXT
> +       *
> +       *     If the VF Cache Invalidation Enable is set to a 1 in a
> +       *     PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
> +       *     sets to 0, with the VF Cache Invalidation Enable set to 0
> +       *     needs to be sent prior to the PIPE_CONTROL with VF Cache
> +       *     Invalidation Enable set to a 1."
> +       */
> +      genX(emit_raw_pipe_control)(brw, 0, NULL, 0, 0);
> +   }
> +
> +   if (GEN_GEN == 9 && IS_COMPUTE_PIPELINE(brw) && post_sync_flags) {
> +      /* Project: SKL / Argument: LRI Post Sync Operation [23]
> +       *
> +       * "PIPECONTROL command with “Command Streamer Stall Enable” must be
> +       *  programmed prior to programming a PIPECONTROL command with "LRI
> +       *  Post Sync Operation" in GPGPU mode of operation (i.e when
> +       *  PIPELINE_SELECT command is set to GPGPU mode of operation)."
> +       *
> +       * The same text exists a few rows below for Post Sync Op.
> +       */
> +      genX(emit_raw_pipe_control)(brw, PIPE_CONTROL_CS_STALL, bo, offset, imm);

Are bo, offset, imm needed here as well?

> +   }
> +
> +   if (GEN_GEN == 10 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
> +      /* Cannonlake:
> +       * "Before sending a PIPE_CONTROL command with bit 12 set, SW must issue
> +       *  another PIPE_CONTROL with Render Target Cache Flush Enable (bit 12)
> +       *  = 0 and Pipe Control Flush Enable (bit 7) = 1"
> +       */
> +      genX(emit_raw_pipe_control)(brw, PIPE_CONTROL_FLUSH_ENABLE,
> +                                  bo, offset, imm);
> +   }
> +
> +   /* "Flush Types" workarounds ---------------------------------------------
> +    * We do these now because they may add post-sync operations or CS stalls.
> +    */
> +
> +   if (IS_GEN_BETWEEN(8, 10) && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
> +      /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate
> +       *
> +       * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or
> +       *  'Write PS Depth Count' or 'Write Timestamp'."
> +       */
> +      if (!bo) {
> +         flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
> +         post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
> +         non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
> +         bo = brw->workaround_bo;
>        }
> +   }
>  
> -      /* If this is the fourth pipe control without a CS stall, do one now. */
> -      if (++brw->pipe_controls_since_last_cs_stall == 4) {
> -         brw->pipe_controls_since_last_cs_stall = 0;
> -         return PIPE_CONTROL_CS_STALL;
> +   /* #1130 from Gen10 workarounds page:
> +    *
> +    *    "Enable Depth Stall on every Post Sync Op if Render target Cache
> +    *     Flush is not enabled in same PIPE CONTROL and Enable Pixel score
> +    *     board stall if Render target cache flush is enabled."
> +    *
> +    * Applicable to CNL B0 and C0 steppings only.
> +    *
> +    * The wording here is unclear, and this workaround doesn't look anything
> +    * like the internal bug report recommendations, but leave it be for now...
> +    */
> +   if (GEN_GEN == 10) {
> +      if (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) {
> +         flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
> +      } else if (flags & non_lri_post_sync_flags) {
> +         flags |= PIPE_CONTROL_DEPTH_STALL;
>        }
>     }
> -   return 0;
> -}
>  
> -/* #1130 from gen10 workarounds page in h/w specs:
> - * "Enable Depth Stall on every Post Sync Op if Render target Cache Flush is
> - *  not enabled in same PIPE CONTROL and Enable Pixel score board stall if
> - *  Render target cache flush is enabled."
> - *
> - * Applicable to CNL B0 and C0 steppings only.
> - */
> -static void
> -gen10_add_rcpfe_workaround_bits(uint32_t *flags)
> -{
> -   if (*flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) {
> -      *flags = *flags | PIPE_CONTROL_STALL_AT_SCOREBOARD;
> -   } else if (*flags &
> -             (PIPE_CONTROL_WRITE_IMMEDIATE |
> -              PIPE_CONTROL_WRITE_DEPTH_COUNT |
> -              PIPE_CONTROL_WRITE_TIMESTAMP)) {
> -      *flags = *flags | PIPE_CONTROL_DEPTH_STALL;
> +   if (GEN_VERSIONx10 < 75 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
> +      /* Project: PRE-HSW / Argument: Depth Stall
> +       *
> +       * "The following bits must be clear:
> +       *  - Render Target Cache Flush Enable ([12] of DW1)
> +       *  - Depth Cache Flush Enable ([0] of DW1)"
> +       */
> +      assert(!(flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
> +                        PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
>     }
> -}
>  
> -static unsigned
> -flags_to_post_sync_op(uint32_t flags)
> -{
> -   flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
> -            PIPE_CONTROL_WRITE_DEPTH_COUNT |
> -            PIPE_CONTROL_WRITE_TIMESTAMP;
> +   if (GEN_GEN >= 6 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
> +      /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable):
> +       *
> +       *    "This bit must be DISABLED for operations other than writing
> +       *     PS_DEPTH_COUNT."
> +       *
> +       * This seems like nonsense.  An Ivybridge workaround requires us to
> +       * emit a PIPE_CONTROL with a depth stall and write immediate post-sync
> +       * operation.  Gen8+ requires us to emit depth stalls and depth cache
> +       * flushes together.  So, it's hard to imagine this means anything other
> +       * than "we originally intended this to be used for PS_DEPTH_COUNT".
> +       *
> +       * We ignore the supposed restriction and do nothing.
> +       */
> +   }
>  
> -   assert(util_bitcount(flags) <= 1);
> +   if (GEN_VERSIONx10 < 75 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) {
> +      /* Project: PRE-HSW / Argument: Depth Cache Flush
> +       *
> +       * "Depth Stall must be clear ([13] of DW1)."
> +       */
> +      assert(!(flags & PIPE_CONTROL_DEPTH_STALL));
> +   }
>  
> -   if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
> -      return WriteImmediateData;
> +   if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
> +                PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
> +      /* From the PIPE_CONTROL instruction table, bit 12 and bit 1:
> +       *
> +       *    "This bit must be DISABLED for End-of-pipe (Read) fences,
> +       *     PS_DEPTH_COUNT or TIMESTAMP queries."
> +       *
> +       * TODO: Implement end-of-pipe checking.
> +       */
> +      assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT |
> +                                  PIPE_CONTROL_WRITE_TIMESTAMP)));
> +   }
>  
> -   if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
> -      return WritePSDepthCount;
> +   if (GEN_GEN < 11 && (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
> +      /* From the PIPE_CONTROL instruction table, bit 1:
> +       *
> +       *    "This bit is ignored if Depth Stall Enable is set.
> +       *     Further, the render cache is not flushed even if Write Cache
> +       *     Flush Enable bit is set."
> +       *
> +       * We assert that the caller doesn't do this combination, to try and
> +       * prevent mistakes.  It shouldn't hurt the GPU, though.
> +       *
> +       * We skip this check on Gen11+ as the "Stall and Pixel Scoreboard"
> +       * and "Render Target Flush" combo is explicitly required for BTI
> +       * update workarounds.
> +       */
> +      assert(!(flags & (PIPE_CONTROL_DEPTH_STALL |
> +                        PIPE_CONTROL_RENDER_TARGET_FLUSH)));
> +   }
>  
> -   if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
> -      return WriteTimestamp;
> +   /* PIPE_CONTROL page workarounds ------------------------------------- */
>  
> -   return 0;
> -}
> +   if (IS_GEN_BETWEEN(7, 8) && (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) {
> +      /* From the PIPE_CONTROL page itself:
> +       *
> +       *    "IVB, HSW, BDW
> +       *     Restriction: Pipe_control with CS-stall bit set must be issued
> +       *     before a pipe-control command that has the State Cache
> +       *     Invalidate bit set."
> +       */
> +      flags |= PIPE_CONTROL_CS_STALL;
> +   }
>  
> -void
> -genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
> -                            struct brw_bo *bo, uint32_t offset, uint64_t imm)
> -{
> -   if (GEN_GEN >= 8) {
> -      if (GEN_GEN == 8)
> -         gen8_add_cs_stall_workaround_bits(&flags);
> -
> -      if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) {
> -         if (GEN_GEN == 9) {
> -            /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
> -             * lists several workarounds:
> -             *
> -             *    "Project: SKL, KBL, BXT
> -             *
> -             *     If the VF Cache Invalidation Enable is set to a 1 in a
> -             *     PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
> -             *     sets to 0, with the VF Cache Invalidation Enable set to 0
> -             *     needs to be sent prior to the PIPE_CONTROL with VF Cache
> -             *     Invalidation Enable set to a 1."
> -             */
> -            brw_emit_pipe_control_flush(brw, 0);
> -         }
> -
> -         if (GEN_GEN >= 9) {
> -            /* THE PIPE_CONTROL "VF Cache Invalidation Enable" docs continue:
> -             *
> -             *    "Project: BDW+
> -             *
> -             *     When VF Cache Invalidate is set “Post Sync Operation” must
> -             *     be enabled to “Write Immediate Data” or “Write PS Depth
> -             *     Count” or “Write Timestamp”."
> -             *
> -             * If there's a BO, we're already doing some kind of write.
> -             * If not, add a write to the workaround BO.
> -             *
> -             * XXX: This causes GPU hangs on Broadwell, so restrict it to
> -             *      Gen9+ for now...see this bug for more information:
> -             *      https://bugs.freedesktop.org/show_bug.cgi?id=103787

In "Flush Types" workarounds later on you apply this for gen8 as well.

> -             */
> -            if (!bo) {
> -               flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
> -               bo = brw->workaround_bo;
> -            }
> -         }
> +   if (GEN_IS_HASWELL) {
> +      /* From the PIPE_CONTROL page itself:
> +       *
> +       *    "HSW - Programming Note: PIPECONTROL with RO Cache Invalidation:
> +       *     Prior to programming a PIPECONTROL command with any of the RO
> +       *     cache invalidation bit set, program a PIPECONTROL flush command
> +       *     with “CS stall” bit and “HDC Flush” bit set."
> +       *
> +       * TODO: Actually implement this.  What's an HDC Flush?

There is bit 9 HDC Flush but that is defined for ICL, for HSW I couldn't find
anything either.

> +       */
> +   }
> +
> +   if (flags & PIPE_CONTROL_FLUSH_LLC) {
> +      /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC):
> +       *
> +       *    "Project: ALL
> +       *     SW must always program Post-Sync Operation to "Write Immediate
> +       *     Data" when Flush LLC is set."
> +       *
> +       * For now, we just require the caller to do it.
> +       */
> +      assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE);
> +   }
> +
> +   /* "Post-Sync Operation" workarounds -------------------------------- */
> +
> +   /* Project: All / Argument: Global Snapshot Count Reset [19]
> +    *
> +    * "This bit must not be exercised on any product.
> +    *  Requires stall bit ([20] of DW1) set."
> +    *
> +    * We don't use this, so we just assert that it isn't used.  The
> +    * PIPE_CONTROL instruction page indicates that they intended this
> +    * as a debug feature and don't think it is useful in production,
> +    * but it may actually be usable, should we ever want to.
> +    */
> +   assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0);
> +
> +   if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR |
> +                PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) {
> +      /* Project: All / Arguments:
> +       *
> +       * - Generic Media State Clear [16]
> +       * - Indirect State Pointers Disable [16]
> +       *
> +       *    "Requires stall bit ([20] of DW1) set."
> +       *
> +       * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media
> +       * State Clear) says:
> +       *
> +       *    "PIPECONTROL command with “Command Streamer Stall Enable” must be
> +       *     programmed prior to programming a PIPECONTROL command with "Media
> +       *     State Clear" set in GPGPU mode of operation"
> +       *
> +       * This is a subset of the earlier rule, so there's nothing to do.
> +       */
> +      flags |= PIPE_CONTROL_CS_STALL;
> +   }
> +
> +   if (flags & PIPE_CONTROL_STORE_DATA_INDEX) {
> +      /* Project: All / Argument: Store Data Index
> +       *
> +       * "Post-Sync Operation ([15:14] of DW1) must be set to something other
> +       *  than '0'."
> +       *
> +       * For now, we just assert that the caller does this.  We might want to
> +       * automatically add a write to the workaround BO...
> +       */
> +      assert(non_lri_post_sync_flags != 0);
> +   }
> +
> +   if (flags & PIPE_CONTROL_SYNC_GFDT) {
> +      /* Project: All / Argument: Sync GFDT
> +       *
> +       * "Post-Sync Operation ([15:14] of DW1) must be set to something other
> +       *  than '0' or 0x2520[13] must be set."
> +       *
> +       * For now, we just assert that the caller does this.
> +       */
> +      assert(non_lri_post_sync_flags != 0);
> +   }
> +
> +   if (IS_GENx10_BETWEEN(60, 75) && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
> +      /* Project: SNB, IVB, HSW / Argument: TLB inv
> +       *
> +       * "{All SKUs}{All Steppings}: Post-Sync Operation ([15:14] of DW1)
> +       *  must be set to something other than '0'."
> +       *
> +       * For now, we just assert that the caller does this.
> +       */
> +      assert(non_lri_post_sync_flags != 0);
> +   }
> +
> +   if (GEN_GEN >= 7 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
> +      /* Project: IVB+ / Argument: TLB inv
> +       *
> +       *    "Requires stall bit ([20] of DW1) set."
> +       *
> +       * Also, from the PIPE_CONTROL instruction table:
> +       *
> +       *    "Project: SKL+
> +       *     Post Sync Operation or CS stall must be set to ensure a TLB
> +       *     invalidation occurs.  Otherwise no cycle will occur to the TLB
> +       *     cache to invalidate."
> +       *
> +       * This is not a subset of the earlier rule, so there's nothing to do.
> +       */
> +      flags |= PIPE_CONTROL_CS_STALL;
> +   }
> +
> +   if (GEN_GEN == 9 && devinfo->gt == 4) {
> +      /* TODO: The big Skylake GT4 post sync op workaround */
> +   }
> +
> +   /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */
> +
> +   if (IS_COMPUTE_PIPELINE(brw)) {
> +      if (GEN_GEN >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) {
> +         /* Project: SKL+ / Argument: Tex Invalidate
> +          * "Requires stall bit ([20] of DW) set for all GPGPU Workloads."
> +          */
> +         flags |= PIPE_CONTROL_CS_STALL;
>        }
>  
> -      if (GEN_GEN == 10)
> -         gen10_add_rcpfe_workaround_bits(&flags);
> -   } else if (GEN_GEN >= 6) {
> -      if (GEN_GEN == 6 &&
> -          (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
> -         /* Hardware workaround: SNB B-Spec says:
> +      if (GEN_GEN == 8 && (post_sync_flags ||
> +                           (flags & (PIPE_CONTROL_NOTIFY_ENABLE |
> +                                     PIPE_CONTROL_DEPTH_STALL |
> +                                     PIPE_CONTROL_RENDER_TARGET_FLUSH |
> +                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> +                                     PIPE_CONTROL_DATA_CACHE_FLUSH)))) {
> +         /* Project: BDW / Arguments:
>            *
> -          *   [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
> -          *   Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
> -          *   required.
> +          * - LRI Post Sync Operation   [23]
> +          * - Post Sync Op              [15:14]
> +          * - Notify En                 [8]
> +          * - Depth Stall               [13]
> +          * - Render Target Cache Flush [12]
> +          * - Depth Cache Flush         [0]
> +          * - DC Flush Enable           [5]
> +          *
> +          *    "Requires stall bit ([20] of DW) set for all GPGPU and Media
> +          *     Workloads."

This I couldn't find.

>            */
> -         brw_emit_post_sync_nonzero_flush(brw);
> +         flags |= PIPE_CONTROL_CS_STALL;
> +
> +         /* Also, from the PIPE_CONTROL instruction table, bit 20:
> +          *
> +          *    "Project: BDW
> +          *     This bit must be always set when PIPE_CONTROL command is
> +          *     programmed by GPGPU and MEDIA workloads, except for the cases
> +          *     when only Read Only Cache Invalidation bits are set (State
> +          *     Cache Invalidation Enable, Instruction cache Invalidation
> +          *     Enable, Texture Cache Invalidation Enable, Constant Cache
> +          *     Invalidation Enable). This is to WA FFDOP CG issue, this WA
> +          *     need not implemented when FF_DOP_CG is disable via "Fixed
> +          *     Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register."
> +          *
> +          * It sounds like we could avoid CS stalls in some cases, but we
> +          * don't currently bother.  This list isn't exactly the list above,
> +          * either...
> +          */
> +      }
> +   }
> +
> +   /* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
> +    *
> +    * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
> +    *  only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
> +    *
> +    * Note that the kernel does CS stalls between batches, so we only need
> +    * to count them within a batch.  We currently naively count every 4, and
> +    * don't skip the ones with only read-cache-invalidate bits set.  This
> +    * may or may not be a problem...
> +    */
> +   if (GEN_GEN == 7 && !GEN_IS_HASWELL) {
> +      if (flags & PIPE_CONTROL_CS_STALL) {
> +         /* If we're doing a CS stall, reset the counter and carry on. */
> +         brw->pipe_controls_since_last_cs_stall = 0;
>        }
>  
> -      flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
> +      /* If this is the fourth pipe control without a CS stall, do one now. */
> +      if (++brw->pipe_controls_since_last_cs_stall == 4) {
> +         brw->pipe_controls_since_last_cs_stall = 0;
> +         flags |= PIPE_CONTROL_CS_STALL;
> +      }
>     }
>  
> +   /* "Stall" workarounds ----------------------------------------------
> +    * These have to come after the earlier ones because we may have added
> +    * some additional CS stalls above.
> +    */
> +
> +   if (GEN_GEN < 9 && (flags & PIPE_CONTROL_CS_STALL)) {
> +      /* Project: PRE-SKL, VLV, CHV
> +       *
> +       * "[All Stepping][All SKUs]:
> +       *
> +       *  One of the following must also be set:
> +       *
> +       *  - Render Target Cache Flush Enable ([12] of DW1)
> +       *  - Depth Cache Flush Enable ([0] of DW1)
> +       *  - Stall at Pixel Scoreboard ([1] of DW1)
> +       *  - Depth Stall ([13] of DW1)
> +       *  - Post-Sync Operation ([13] of DW1)
> +       *  - DC Flush Enable ([5] of DW1)"
> +       *
> +       * If we don't already have one of those bits set, we choose to add
> +       * "Stall at Pixel Scoreboard".  Some of the other bits require a
> +       * CS stall as a workaround (see above), which would send us into
> +       * an infinite recursion of PIPE_CONTROLs.  "Stall at Pixel Scoreboard"
> +       * appears to be safe, so we choose that.
> +       */
> +      const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
> +                               PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> +                               PIPE_CONTROL_WRITE_IMMEDIATE |
> +                               PIPE_CONTROL_WRITE_DEPTH_COUNT |
> +                               PIPE_CONTROL_WRITE_TIMESTAMP |
> +                               PIPE_CONTROL_STALL_AT_SCOREBOARD |
> +                               PIPE_CONTROL_DEPTH_STALL |
> +                               PIPE_CONTROL_DATA_CACHE_FLUSH;
> +      if (!(flags & wa_bits))
> +         flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
> +   }
> +
> +   /* Emit --------------------------------------------------------------- */
> +
>     brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) {
>     #if GEN_GEN >= 9
>        pc.FlushLLC = 0;
> -- 
> 2.19.1
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list