[Mesa-dev] [PATCH 02/16] winsys/amdgpu: disable CPU caching for GFX & SDMA IBs
Marek Olšák
maraeo at gmail.com
Fri Oct 13 12:03:58 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
This should decrease IB fetch latency.
---
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 0a657f7..8fbe8ae 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -609,21 +609,22 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs,
buffer->u.real.priority_usage |= 1ull << priority;
buffer->usage |= usage;
cs->last_added_bo = bo;
cs->last_added_bo_index = index;
cs->last_added_bo_usage = buffer->usage;
cs->last_added_bo_priority_usage = buffer->u.real.priority_usage;
return index;
}
-static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib)
+static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib,
+ enum ring_type ring_type)
{
struct pb_buffer *pb;
uint8_t *mapped;
unsigned buffer_size;
/* Always create a buffer that is at least as large as the maximum seen IB
* size, aligned to a power of two (and multiplied by 4 to reduce internal
* fragmentation if chaining is not available). Limit to 512k dwords, which
* is the largest power of two that fits into the size field of the
* INDIRECT_BUFFER packet.
@@ -639,21 +640,25 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib)
case IB_MAIN:
buffer_size = MAX2(buffer_size, 8 * 1024 * 4);
break;
default:
unreachable("unhandled IB type");
}
pb = ws->base.buffer_create(&ws->base, buffer_size,
ws->info.gart_page_size,
RADEON_DOMAIN_GTT,
- RADEON_FLAG_NO_INTERPROCESS_SHARING);
+ RADEON_FLAG_NO_INTERPROCESS_SHARING |
+ (ring_type == RING_GFX ||
+ ring_type == RING_COMPUTE ||
+ ring_type == RING_DMA ?
+ RADEON_FLAG_GTT_WC : 0));
if (!pb)
return false;
mapped = ws->base.buffer_map(pb, NULL, PIPE_TRANSFER_WRITE);
if (!mapped) {
pb_reference(&pb, NULL);
return false;
}
pb_reference(&ib->big_ib_buffer, pb);
@@ -709,21 +714,21 @@ static bool amdgpu_get_new_ib(struct radeon_winsys *ws, struct amdgpu_cs *cs,
ib->max_ib_size = ib->max_ib_size - ib->max_ib_size / 32;
ib->base.prev_dw = 0;
ib->base.num_prev = 0;
ib->base.current.cdw = 0;
ib->base.current.buf = NULL;
/* Allocate a new buffer for IBs if the current buffer is all used. */
if (!ib->big_ib_buffer ||
ib->used_ib_space + ib_size > ib->big_ib_buffer->size) {
- if (!amdgpu_ib_new_buffer(aws, ib))
+ if (!amdgpu_ib_new_buffer(aws, ib, cs->ring_type))
return false;
}
info->va_start = amdgpu_winsys_bo(ib->big_ib_buffer)->va + ib->used_ib_space;
info->ib_bytes = 0;
/* ib_bytes is in dwords and the conversion to bytes will be done before
* the CS ioctl. */
ib->ptr_ib_size = &info->ib_bytes;
ib->ptr_ib_size_inside_ib = false;
@@ -919,21 +924,21 @@ static bool amdgpu_cs_check_space(struct radeon_winsys_cs *rcs, unsigned dw)
new_prev = REALLOC(rcs->prev,
sizeof(*new_prev) * rcs->max_prev,
sizeof(*new_prev) * new_max_prev);
if (!new_prev)
return false;
rcs->prev = new_prev;
rcs->max_prev = new_max_prev;
}
- if (!amdgpu_ib_new_buffer(cs->ctx->ws, ib))
+ if (!amdgpu_ib_new_buffer(cs->ctx->ws, ib, cs->ring_type))
return false;
assert(ib->used_ib_space == 0);
va = amdgpu_winsys_bo(ib->big_ib_buffer)->va;
/* This space was originally reserved. */
rcs->current.max_dw += 4;
assert(ib->used_ib_space + 4 * rcs->current.max_dw <= ib->big_ib_buffer->size);
/* Pad with NOPs and add INDIRECT_BUFFER packet */
--
2.7.4
More information about the mesa-dev
mailing list