[Mesa-dev] [PATCH 17/17] winsys/amdgpu: always set AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE

Thu Apr 5 22:30:49 UTC 2018

I've run v2 series yesterday, before the 'Mega cleanup' landed through

glmark2, UH, UV, Blender, FreeCAD, Krita 4.0.0 under KDE Plasma5

on my Polaris 20 / RX580.

With UH and UV I've saw little corruption (flickering) of the 'fps 
counter' in the upper right corner and during 'Benchmark' the 
'Benchmark...' string was somewhat corrupted, too. The right side of 
this text string was flickering. Like there is a big wrongly overlayed 
triangle.

Other than that, series is

Tested-by: Dieter Nützel <Dieter at nuetzel-hh.de>

Dieter

Am 04.04.2018 03:59, schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> There is a kernel patch that adds the new flag.
> ---
>  src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 36 
> ++++++++++++++++++++++---------
>  1 file changed, 26 insertions(+), 10 deletions(-)
> 
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> index a3feeb93026..eb050b8fdb2 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> @@ -26,20 +26,24 @@
>   * of the Software.
>   */
> 
>  #include "amdgpu_cs.h"
>  #include "util/os_time.h"
>  #include <inttypes.h>
>  #include <stdio.h>
> 
>  #include "amd/common/sid.h"
> 
> +#ifndef AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE
> +#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
> +#endif
> +
>  DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false)
> 
>  /* FENCES */
> 
>  static struct pipe_fence_handle *
>  amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type,
>                      unsigned ip_instance, unsigned ring)
>  {
>     struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence);
> 
> @@ -801,56 +805,68 @@ static void amdgpu_set_ib_size(struct amdgpu_ib 
> *ib)
>  }
> 
>  static void amdgpu_ib_finalize(struct amdgpu_winsys *ws, struct 
> amdgpu_ib *ib)
>  {
>     amdgpu_set_ib_size(ib);
>     ib->used_ib_space += ib->base.current.cdw * 4;
>     ib->used_ib_space = align(ib->used_ib_space, 
> ws->info.ib_start_alignment);
>     ib->max_ib_size = MAX2(ib->max_ib_size, ib->base.prev_dw +
> ib->base.current.cdw);
>  }
> 
> -static bool amdgpu_init_cs_context(struct amdgpu_cs_context *cs,
> +static bool amdgpu_init_cs_context(struct amdgpu_winsys *ws,
> +                                   struct amdgpu_cs_context *cs,
>                                     enum ring_type ring_type)
>  {
>     switch (ring_type) {
>     case RING_DMA:
>        cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_DMA;
>        break;
> 
>     case RING_UVD:
>        cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD;
>        break;
> 
>     case RING_UVD_ENC:
>        cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD_ENC;
>        break;
> 
>     case RING_VCE:
>        cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCE;
>        break;
> 
> -   case RING_COMPUTE:
> -      cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_COMPUTE;
> -      break;
> -
>     case RING_VCN_DEC:
>        cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_DEC;
>        break;
> 
> -  case RING_VCN_ENC:
> +   case RING_VCN_ENC:
>        cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_ENC;
>        break;
> 
> -   default:
> +   case RING_COMPUTE:
>     case RING_GFX:
> -      cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_GFX;
> +      cs->ib[IB_MAIN].ip_type = ring_type == RING_GFX ? 
> AMDGPU_HW_IP_GFX :
> +                                                        
> AMDGPU_HW_IP_COMPUTE;
> +
> +      /* The kernel shouldn't invalidate L2 and vL1. The proper place 
> for cache
> +       * invalidation is the beginning of IBs (the previous commit 
> does that),
> +       * because completion of an IB doesn't care about the state of
> GPU caches,
> +       * but the beginning of an IB does. Draw calls from multiple IBs 
> can be
> +       * executed in parallel, so draw calls from the current IB can
> finish after
> +       * the next IB starts drawing, and so the cache flush at the end 
> of IB
> +       * is always late.
> +       */
> +      if (ws->info.drm_minor >= 26)
> +         cs->ib[IB_MAIN].flags = AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE;
>        break;
> +
> +   default:
> +      assert(0);
>     }
> 
>     memset(cs->buffer_indices_hashlist, -1,
> sizeof(cs->buffer_indices_hashlist));
>     cs->last_added_bo = NULL;
>     return true;
>  }
> 
>  static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *cs)
>  {
>     unsigned i;
> @@ -918,26 +934,26 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
>     cs->flush_data = flush_ctx;
>     cs->ring_type = ring_type;
> 
>     struct amdgpu_cs_fence_info fence_info;
>     fence_info.handle = cs->ctx->user_fence_bo;
>     fence_info.offset = cs->ring_type;
>     amdgpu_cs_chunk_fence_info_to_data(&fence_info, 
> (void*)&cs->fence_chunk);
> 
>     cs->main.ib_type = IB_MAIN;
> 
> -   if (!amdgpu_init_cs_context(&cs->csc1, ring_type)) {
> +   if (!amdgpu_init_cs_context(ctx->ws, &cs->csc1, ring_type)) {
>        FREE(cs);
>        return NULL;
>     }
> 
> -   if (!amdgpu_init_cs_context(&cs->csc2, ring_type)) {
> +   if (!amdgpu_init_cs_context(ctx->ws, &cs->csc2, ring_type)) {
>        amdgpu_destroy_cs_context(&cs->csc1);
>        FREE(cs);
>        return NULL;
>     }
> 
>     /* Set the first submission context as current. */
>     cs->csc = &cs->csc1;
>     cs->cst = &cs->csc2;
> 
>     if (!amdgpu_get_new_ib(&ctx->ws->base, cs, IB_MAIN)) {