[Mesa-dev] [PATCH] radeonsi: add performance thresholds for CP DMA, decrease it for clears
Marek Olšák
maraeo at gmail.com
Sat Oct 7 18:56:17 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
The first one isn't used yet.
---
src/gallium/drivers/radeonsi/si_cp_dma.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 064f6c0..97adc27 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -21,20 +21,26 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Marek Olšák <maraeo at gmail.com>
*/
#include "si_pipe.h"
#include "sid.h"
#include "radeon/r600_cs.h"
+/* Recommended maximum sizes for optimal performance.
+ * Fall back to compute or SDMA if the size is greater.
+ */
+#define CP_DMA_COPY_PERF_THRESHOLD (64 * 1024) /* copied from Vulkan */
+#define CP_DMA_CLEAR_PERF_THRESHOLD (32 * 1024) /* guess (clear is much slower) */
+
/* Set this if you want the ME to wait until CP DMA is done.
* It should be set on the last CP DMA packet. */
#define CP_DMA_SYNC (1 << 0)
/* Set this if the source data was used as a destination in a previous CP DMA
* packet. It's for preventing a read-after-write (RAW) hazard between two
* CP DMA packets. */
#define CP_DMA_RAW_WAIT (1 << 1)
#define CP_DMA_USE_L2 (1 << 2) /* CIK+ */
#define CP_DMA_CLEAR (1 << 3)
@@ -223,21 +229,21 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
util_range_add(&rdst->valid_buffer_range, offset,
offset + dma_clear_size);
/* dma_clear_buffer can use clear_buffer on failure. Make sure that
* doesn't happen. We don't want an infinite recursion: */
if (sctx->b.dma.cs &&
!(dst->flags & PIPE_RESOURCE_FLAG_SPARSE) &&
(offset % 4 == 0) &&
/* CP DMA is very slow. Always use SDMA for big clears. This
* alone improves DeusEx:MD performance by 70%. */
- (size > 128 * 1024 ||
+ (size > CP_DMA_CLEAR_PERF_THRESHOLD ||
/* Buffers not used by the GFX IB yet will be cleared by SDMA.
* This happens to move most buffer clears to SDMA, including
* DCC and CMASK clears, because pipe->clear clears them before
* si_emit_framebuffer_state (in a draw call) adds them.
* For example, DeusEx:MD has 21 buffer clears per frame and all
* of them are moved to SDMA thanks to this. */
!ws->cs_is_buffer_referenced(sctx->b.gfx.cs, rdst->buf,
RADEON_USAGE_READWRITE))) {
sctx->b.dma_clear_buffer(ctx, dst, offset, dma_clear_size, value);
--
2.7.4
More information about the mesa-dev
mailing list