[Mesa-dev] [PATCH 13/18] winsys/amdgpu: start with smaller IBs, growing as necessary
Nicolai Hähnle
nhaehnle at gmail.com
Mon May 9 23:21:31 UTC 2016
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
This avoids allocating giant IBs from the outset, especially for CE and DMA.
With this change, we also never flush prematurely due to the CE IB: as long
as there is space in the buffer, we will use it.
---
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 55 +++++++++++++++++++++++++------
src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 1 +
2 files changed, 46 insertions(+), 10 deletions(-)
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index a318670..546f224 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -335,11 +335,31 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs,
return index;
}
-static bool amdgpu_ib_new_buffer(struct radeon_winsys *ws, struct amdgpu_ib *ib,
- unsigned buffer_size)
+static bool amdgpu_ib_new_buffer(struct radeon_winsys *ws, struct amdgpu_ib *ib)
{
struct pb_buffer *pb;
uint8_t *mapped;
+ unsigned buffer_size;
+
+ /* Always create a buffer that is at least as large as the largest IB
+ * seen so far (multiplied by a factor to reduce internal fragmentation),
+ * but never more than the maximum IB size supported by the hardware.
+ */
+ buffer_size = 4 << MIN2(19, 2 + util_last_bit(ib->max_ib_size));
+
+ switch (ib->ib_type) {
+ case IB_CONST_PREAMBLE:
+ buffer_size = MAX2(buffer_size, 4 * 1024);
+ break;
+ case IB_CONST:
+ buffer_size = MAX2(buffer_size, 16 * 1024 * 4);
+ break;
+ case IB_MAIN:
+ buffer_size = MAX2(buffer_size, 8 * 1024 * 4);
+ break;
+ default:
+ unreachable("unhandled IB type");
+ }
pb = ws->buffer_create(ws, buffer_size, 4096, RADEON_DOMAIN_GTT,
RADEON_FLAG_CPU_ACCESS);
@@ -370,35 +390,34 @@ static bool amdgpu_get_new_ib(struct radeon_winsys *ws, struct amdgpu_cs *cs,
*/
struct amdgpu_ib *ib = NULL;
struct amdgpu_cs_ib_info *info = &cs->csc->ib[ib_type];
- unsigned buffer_size, ib_size;
+ unsigned ib_size = 0;
switch (ib_type) {
case IB_CONST_PREAMBLE:
ib = &cs->const_preamble_ib;
- buffer_size = 4 * 1024 * 4;
- ib_size = 1024 * 4;
+ ib_size = 256 * 4;
break;
case IB_CONST:
ib = &cs->const_ib;
- buffer_size = 512 * 1024 * 4;
- ib_size = 128 * 1024 * 4;
+ ib_size = 8 * 1024 * 4;
break;
case IB_MAIN:
ib = &cs->main;
- buffer_size = 128 * 1024 * 4;
- ib_size = 20 * 1024 * 4;
+ ib_size = 4 * 1024 * 4;
break;
default:
unreachable("unhandled IB type");
}
+ ib_size = MAX2(ib_size, 4 << MIN2(19, util_last_bit(ib->max_ib_size)));
+
ib->base.cdw = 0;
ib->base.buf = NULL;
/* Allocate a new buffer for IBs if the current buffer is all used. */
if (!ib->big_ib_buffer ||
ib->used_ib_space + ib_size > ib->big_ib_buffer->size) {
- if (!amdgpu_ib_new_buffer(ws, ib, buffer_size))
+ if (!amdgpu_ib_new_buffer(ws, ib))
return false;
}
@@ -408,6 +427,8 @@ static bool amdgpu_get_new_ib(struct radeon_winsys *ws, struct amdgpu_cs *cs,
RADEON_USAGE_READ, 0, RADEON_PRIO_IB1);
ib->base.buf = (uint32_t*)(ib->ib_mapped + ib->used_ib_space);
+
+ ib_size = ib->big_ib_buffer->size - ib->used_ib_space;
ib->base.max_dw = ib_size / 4;
return true;
}
@@ -620,7 +641,17 @@ static boolean amdgpu_cs_validate(struct radeon_winsys_cs *rcs)
static bool amdgpu_cs_check_space(struct radeon_winsys_cs *rcs, unsigned dw)
{
+ struct amdgpu_ib *ib = amdgpu_ib(rcs);
+ struct amdgpu_cs *cs = amdgpu_cs_from_ib(ib);
+ unsigned requested_size = rcs->cdw + dw;
+
assert(rcs->cdw <= rcs->max_dw);
+
+ if (ib->ib_type == IB_MAIN && requested_size > 20 * 1024)
+ return false;
+
+ ib->max_ib_size = MAX2(ib->max_ib_size, requested_size);
+
return rcs->max_dw - rcs->cdw >= dw;
}
@@ -850,15 +881,19 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
/* Set IB sizes. */
cur->ib[IB_MAIN].size = cs->main.base.cdw;
cs->main.used_ib_space += cs->main.base.cdw * 4;
+ cs->main.max_ib_size = MAX2(cs->main.max_ib_size, cs->main.base.cdw);
if (cs->const_ib.ib_mapped) {
cur->ib[IB_CONST].size = cs->const_ib.base.cdw;
cs->const_ib.used_ib_space += cs->const_ib.base.cdw * 4;
+ cs->const_ib.max_ib_size = MAX2(cs->const_ib.max_ib_size, cs->const_ib.base.cdw);
}
if (cs->const_preamble_ib.ib_mapped) {
cur->ib[IB_CONST_PREAMBLE].size = cs->const_preamble_ib.base.cdw;
cs->const_preamble_ib.used_ib_space += cs->const_preamble_ib.base.cdw * 4;
+ cs->const_preamble_ib.max_ib_size =
+ MAX2(cs->const_preamble_ib.max_ib_size, cs->const_preamble_ib.base.cdw);
}
/* Create a fence. */
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
index 25bad07..62811e9 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
@@ -64,6 +64,7 @@ struct amdgpu_ib {
struct pb_buffer *big_ib_buffer;
uint8_t *ib_mapped;
unsigned used_ib_space;
+ unsigned max_ib_size;
enum ib_type ib_type;
};
--
2.7.4
More information about the mesa-dev
mailing list