[Mesa-dev] [PATCH 17/17] winsys/amdgpu: always set AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE
Samuel Pitoiset
samuel.pitoiset at gmail.com
Thu Apr 5 08:22:54 UTC 2018
Patches 16-17 are:
Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Those two are quite interesting. I will probably update my kernel and
experiment something.
On 04/04/2018 03:59 AM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> There is a kernel patch that adds the new flag.
> ---
> src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 36 ++++++++++++++++++++++---------
> 1 file changed, 26 insertions(+), 10 deletions(-)
>
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> index a3feeb93026..eb050b8fdb2 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> @@ -26,20 +26,24 @@
> * of the Software.
> */
>
> #include "amdgpu_cs.h"
> #include "util/os_time.h"
> #include <inttypes.h>
> #include <stdio.h>
>
> #include "amd/common/sid.h"
>
> +#ifndef AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE
> +#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
> +#endif
> +
> DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false)
>
> /* FENCES */
>
> static struct pipe_fence_handle *
> amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type,
> unsigned ip_instance, unsigned ring)
> {
> struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence);
>
> @@ -801,56 +805,68 @@ static void amdgpu_set_ib_size(struct amdgpu_ib *ib)
> }
>
> static void amdgpu_ib_finalize(struct amdgpu_winsys *ws, struct amdgpu_ib *ib)
> {
> amdgpu_set_ib_size(ib);
> ib->used_ib_space += ib->base.current.cdw * 4;
> ib->used_ib_space = align(ib->used_ib_space, ws->info.ib_start_alignment);
> ib->max_ib_size = MAX2(ib->max_ib_size, ib->base.prev_dw + ib->base.current.cdw);
> }
>
> -static bool amdgpu_init_cs_context(struct amdgpu_cs_context *cs,
> +static bool amdgpu_init_cs_context(struct amdgpu_winsys *ws,
> + struct amdgpu_cs_context *cs,
> enum ring_type ring_type)
> {
> switch (ring_type) {
> case RING_DMA:
> cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_DMA;
> break;
>
> case RING_UVD:
> cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD;
> break;
>
> case RING_UVD_ENC:
> cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD_ENC;
> break;
>
> case RING_VCE:
> cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCE;
> break;
>
> - case RING_COMPUTE:
> - cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_COMPUTE;
> - break;
> -
> case RING_VCN_DEC:
> cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_DEC;
> break;
>
> - case RING_VCN_ENC:
> + case RING_VCN_ENC:
> cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_ENC;
> break;
>
> - default:
> + case RING_COMPUTE:
> case RING_GFX:
> - cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_GFX;
> + cs->ib[IB_MAIN].ip_type = ring_type == RING_GFX ? AMDGPU_HW_IP_GFX :
> + AMDGPU_HW_IP_COMPUTE;
> +
> + /* The kernel shouldn't invalidate L2 and vL1. The proper place for cache
> + * invalidation is the beginning of IBs (the previous commit does that),
> + * because completion of an IB doesn't care about the state of GPU caches,
> + * but the beginning of an IB does. Draw calls from multiple IBs can be
> + * executed in parallel, so draw calls from the current IB can finish after
> + * the next IB starts drawing, and so the cache flush at the end of IB
> + * is always late.
> + */
> + if (ws->info.drm_minor >= 26)
> + cs->ib[IB_MAIN].flags = AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE;
> break;
> +
> + default:
> + assert(0);
> }
>
> memset(cs->buffer_indices_hashlist, -1, sizeof(cs->buffer_indices_hashlist));
> cs->last_added_bo = NULL;
> return true;
> }
>
> static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *cs)
> {
> unsigned i;
> @@ -918,26 +934,26 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
> cs->flush_data = flush_ctx;
> cs->ring_type = ring_type;
>
> struct amdgpu_cs_fence_info fence_info;
> fence_info.handle = cs->ctx->user_fence_bo;
> fence_info.offset = cs->ring_type;
> amdgpu_cs_chunk_fence_info_to_data(&fence_info, (void*)&cs->fence_chunk);
>
> cs->main.ib_type = IB_MAIN;
>
> - if (!amdgpu_init_cs_context(&cs->csc1, ring_type)) {
> + if (!amdgpu_init_cs_context(ctx->ws, &cs->csc1, ring_type)) {
> FREE(cs);
> return NULL;
> }
>
> - if (!amdgpu_init_cs_context(&cs->csc2, ring_type)) {
> + if (!amdgpu_init_cs_context(ctx->ws, &cs->csc2, ring_type)) {
> amdgpu_destroy_cs_context(&cs->csc1);
> FREE(cs);
> return NULL;
> }
>
> /* Set the first submission context as current. */
> cs->csc = &cs->csc1;
> cs->cst = &cs->csc2;
>
> if (!amdgpu_get_new_ib(&ctx->ws->base, cs, IB_MAIN)) {
>
More information about the mesa-dev
mailing list