[Mesa-dev] [PATCH v2 24/25] radeonsi: disable SDMA clears and copies for sparse buffers
Nicolai Hähnle
nhaehnle at gmail.com
Tue Mar 28 09:12:14 UTC 2017
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
VM faults cannot be disabled for SDMA on <= VI.
We could still use SDMA by asking the winsys about which parts of the
buffers are committed. This is left as a potential future improvement.
---
src/gallium/drivers/radeonsi/cik_sdma.c | 7 +++++--
src/gallium/drivers/radeonsi/si_cp_dma.c | 1 +
src/gallium/drivers/radeonsi/si_dma.c | 7 +++++--
3 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index bee35cd..90f4f21 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -73,21 +73,22 @@ static void cik_sdma_clear_buffer(struct pipe_context *ctx,
struct pipe_resource *dst,
uint64_t offset,
uint64_t size,
unsigned clear_value)
{
struct si_context *sctx = (struct si_context *)ctx;
struct radeon_winsys_cs *cs = sctx->b.dma.cs;
unsigned i, ncopy, csize;
struct r600_resource *rdst = r600_resource(dst);
- if (!cs || offset % 4 != 0 || size % 4 != 0) {
+ if (!cs || offset % 4 != 0 || size % 4 != 0 ||
+ dst->flags & PIPE_RESOURCE_FLAG_SPARSE) {
ctx->clear_buffer(ctx, dst, offset, size, &clear_value, 4);
return;
}
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
* that range. */
util_range_add(&rdst->valid_buffer_range, offset, offset + size);
offset += rdst->gpu_address;
@@ -519,21 +520,23 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
static void cik_sdma_copy(struct pipe_context *ctx,
struct pipe_resource *dst,
unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
struct pipe_resource *src,
unsigned src_level,
const struct pipe_box *src_box)
{
struct si_context *sctx = (struct si_context *)ctx;
- if (!sctx->b.dma.cs)
+ if (!sctx->b.dma.cs ||
+ src->flags & PIPE_RESOURCE_FLAG_SPARSE ||
+ dst->flags & PIPE_RESOURCE_FLAG_SPARSE)
goto fallback;
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
cik_sdma_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
return;
}
if (cik_sdma_copy_texture(sctx, dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box))
return;
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 812fcbc..f75ce05 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -195,20 +195,21 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
* that range. */
util_range_add(&rdst->valid_buffer_range, offset,
offset + dma_clear_size);
/* dma_clear_buffer can use clear_buffer on failure. Make sure that
* doesn't happen. We don't want an infinite recursion: */
if (sctx->b.dma.cs &&
+ !(dst->flags & PIPE_RESOURCE_FLAG_SPARSE) &&
(offset % 4 == 0) &&
/* CP DMA is very slow. Always use SDMA for big clears. This
* alone improves DeusEx:MD performance by 70%. */
(size > 128 * 1024 ||
/* Buffers not used by the GFX IB yet will be cleared by SDMA.
* This happens to move most buffer clears to SDMA, including
* DCC and CMASK clears, because pipe->clear clears them before
* si_emit_framebuffer_state (in a draw call) adds them.
* For example, DeusEx:MD has 21 buffer clears per frame and all
* of them are moved to SDMA thanks to this. */
diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c
index 9dbee3a..b236161 100644
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -82,21 +82,22 @@ static void si_dma_clear_buffer(struct pipe_context *ctx,
struct pipe_resource *dst,
uint64_t offset,
uint64_t size,
unsigned clear_value)
{
struct si_context *sctx = (struct si_context *)ctx;
struct radeon_winsys_cs *cs = sctx->b.dma.cs;
unsigned i, ncopy, csize;
struct r600_resource *rdst = r600_resource(dst);
- if (!cs || offset % 4 != 0 || size % 4 != 0) {
+ if (!cs || offset % 4 != 0 || size % 4 != 0 ||
+ dst->flags & PIPE_RESOURCE_FLAG_SPARSE) {
ctx->clear_buffer(ctx, dst, offset, size, &clear_value, 4);
return;
}
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
* that range. */
util_range_add(&rdst->valid_buffer_range, offset, offset + size);
offset += rdst->gpu_address;
@@ -226,21 +227,23 @@ static void si_dma_copy(struct pipe_context *ctx,
const struct pipe_box *src_box)
{
struct si_context *sctx = (struct si_context *)ctx;
struct r600_texture *rsrc = (struct r600_texture*)src;
struct r600_texture *rdst = (struct r600_texture*)dst;
unsigned dst_pitch, src_pitch, bpp, dst_mode, src_mode;
unsigned src_w, dst_w;
unsigned src_x, src_y;
unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
- if (sctx->b.dma.cs == NULL) {
+ if (sctx->b.dma.cs == NULL ||
+ src->flags & PIPE_RESOURCE_FLAG_SPARSE ||
+ dst->flags & PIPE_RESOURCE_FLAG_SPARSE) {
goto fallback;
}
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
si_dma_copy_buffer(sctx, dst, src, dst_x, src_box->x, src_box->width);
return;
}
/* XXX: Using the asynchronous DMA engine for multi-dimensional
* operations seems to cause random GPU lockups for various people.
--
2.9.3
More information about the mesa-dev
mailing list