[Mesa-dev] [PATCH 1/3] r600g: fix CP DMA hazard with index buffer fetches

Alex Deucher alexdeucher at gmail.com
Fri May 27 20:09:48 UTC 2016


On Fri, May 27, 2016 at 3:56 PM, Marek Olšák <maraeo at gmail.com> wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> v2: don't use PFP_SYNC_ME on R700

All 6 patches look reasonable to me.
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>

> ---
>  src/gallium/drivers/r600/evergreen_hw_context.c | 13 +++++++++++--
>  src/gallium/drivers/r600/evergreend.h           |  1 +
>  src/gallium/drivers/r600/r600_blit.c            |  6 ------
>  src/gallium/drivers/r600/r600_hw_context.c      | 25 ++++++++++++++++++++-----
>  src/gallium/drivers/r600/r600d.h                |  1 +
>  src/gallium/drivers/radeonsi/sid.h              |  2 +-
>  6 files changed, 34 insertions(+), 14 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
> index f456696..14877ae 100644
> --- a/src/gallium/drivers/r600/evergreen_hw_context.c
> +++ b/src/gallium/drivers/r600/evergreen_hw_context.c
> @@ -117,7 +117,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
>                 unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
>                 unsigned reloc;
>
> -               r600_need_cs_space(rctx, 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);
> +               r600_need_cs_space(rctx, 2 + 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);
>
>                 /* Flush the caches for the first copy only. */
>                 if (rctx->b.flags) {
> @@ -148,9 +148,18 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
>                 offset += byte_count;
>         }
>
> +       /* CP DMA is executed in ME, but index buffers are read by PFP.
> +        * This ensures that ME (CP DMA) is idle before PFP starts fetching
> +        * indices. If we wanted to execute CP DMA in PFP, this packet
> +        * should precede it.
> +        */
> +       if (coher == R600_COHERENCY_SHADER) {
> +               radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
> +               radeon_emit(cs, 0);
> +       }
> +
>         /* Invalidate the read caches. */
>         rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
>                          R600_CONTEXT_INV_VERTEX_CACHE |
>                          R600_CONTEXT_INV_TEX_CACHE;
>  }
> -
> diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
> index c1c6169..a81b6c5 100644
> --- a/src/gallium/drivers/r600/evergreend.h
> +++ b/src/gallium/drivers/r600/evergreend.h
> @@ -88,6 +88,7 @@
>  #define                WAIT_REG_MEM_EQUAL              3
>  #define PKT3_MEM_WRITE                         0x3D
>  #define PKT3_INDIRECT_BUFFER                   0x32
> +#define PKT3_PFP_SYNC_ME                      0x42
>  #define PKT3_SURFACE_SYNC                      0x43
>  #define PKT3_ME_INITIALIZE                     0x44
>  #define PKT3_COND_WRITE                        0x45
> diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
> index 9230b40..9f309d8 100644
> --- a/src/gallium/drivers/r600/r600_blit.c
> +++ b/src/gallium/drivers/r600/r600_blit.c
> @@ -519,12 +519,6 @@ static void r600_copy_buffer(struct pipe_context *ctx, struct pipe_resource *dst
>         } else {
>                 util_resource_copy_region(ctx, dst, 0, dstx, 0, 0, src, 0, src_box);
>         }
> -
> -       /* The index buffer (VGT) doesn't seem to see the result of the copying.
> -        * Can we somehow flush the index buffer cache? Starting a new IB seems
> -        * to do the trick. */
> -       if (rctx->b.chip_class <= R700)
> -               rctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
>  }
>
>  /**
> diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
> index 1f7bed8..5f63dde 100644
> --- a/src/gallium/drivers/r600/r600_hw_context.c
> +++ b/src/gallium/drivers/r600/r600_hw_context.c
> @@ -403,7 +403,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
>                 unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
>                 unsigned src_reloc, dst_reloc;
>
> -               r600_need_cs_space(rctx, 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);
> +               r600_need_cs_space(rctx, 2 + 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);
>
>                 /* Flush the caches for the first copy only. */
>                 if (rctx->b.flags) {
> @@ -438,10 +438,25 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
>                 dst_offset += byte_count;
>         }
>
> -       /* Invalidate the read caches. */
> -       rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
> -                        R600_CONTEXT_INV_VERTEX_CACHE |
> -                        R600_CONTEXT_INV_TEX_CACHE;
> +       /* CP DMA is executed in ME, but index buffers are read by PFP.
> +        * This ensures that ME (CP DMA) is idle before PFP starts fetching
> +        * indices. If we wanted to execute CP DMA in PFP, this packet
> +        * should precede it.
> +        *
> +        * R6xx-R7xx is out of luck, as it doesn't have the packet.
> +        * Starting a new IB has the same effect.
> +        */
> +       if (rctx->b.chip_class >= EVERGREEN) {
> +               radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
> +               radeon_emit(cs, 0);
> +
> +               /* Invalidate the read caches. */
> +               rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
> +                                R600_CONTEXT_INV_VERTEX_CACHE |
> +                                R600_CONTEXT_INV_TEX_CACHE;
> +       } else {
> +               rctx->b.gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
> +       }
>  }
>
>  void r600_dma_copy_buffer(struct r600_context *rctx,
> diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
> index 24f599e..5f79222 100644
> --- a/src/gallium/drivers/r600/r600d.h
> +++ b/src/gallium/drivers/r600/r600d.h
> @@ -98,6 +98,7 @@
>  #define                WAIT_REG_MEM_EQUAL              3
>  #define PKT3_MEM_WRITE                         0x3D
>  #define PKT3_INDIRECT_BUFFER                   0x32
> +#define PKT3_PFP_SYNC_ME                      0x42 /* EG+ */
>  #define PKT3_SURFACE_SYNC                      0x43
>  #define PKT3_ME_INITIALIZE                     0x44
>  #define PKT3_COND_WRITE                        0x45
> diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
> index 25f8cf5..ddbfe00 100644
> --- a/src/gallium/drivers/radeonsi/sid.h
> +++ b/src/gallium/drivers/radeonsi/sid.h
> @@ -135,7 +135,7 @@
>  #define                COPY_DATA_DST_SEL(x)            (((unsigned)(x) & 0xf) << 8)
>  #define                COPY_DATA_COUNT_SEL             (1 << 16)
>  #define                COPY_DATA_WR_CONFIRM            (1 << 20)
> -#define PKT3_PFP_SYNC_ME                      0x42 /* r7xx+ */
> +#define PKT3_PFP_SYNC_ME                      0x42
>  #define PKT3_SURFACE_SYNC                      0x43 /* deprecated on CIK, use ACQUIRE_MEM */
>  #define PKT3_ME_INITIALIZE                     0x44 /* not on CIK */
>  #define PKT3_COND_WRITE                        0x45
> --
> 2.7.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list