[Mesa-dev] [PATCH 12/22] anv/gpu_memcpy: Add a lighter-weight memcpy path
Nanley Chery
nanleychery at gmail.com
Thu Apr 27 18:32:11 UTC 2017
We're now performing a GPU memcpy in more places to copy small amounts
of data. Add a path to thrash less state.
Signed-off-by: Nanley Chery <nanley.g.chery at intel.com>
---
src/intel/vulkan/genX_gpu_memcpy.c | 38 ++++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)
diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c
index 3cbc7235cf..f15c2a5f72 100644
--- a/src/intel/vulkan/genX_gpu_memcpy.c
+++ b/src/intel/vulkan/genX_gpu_memcpy.c
@@ -28,6 +28,8 @@
#include "common/gen_l3_config.h"
+#define MI_PREDICATE_SRC0 0x2400
+
/**
* This file implements some lightweight memcpy/memset operations on the GPU
* using a vertex buffer and streamout.
@@ -63,6 +65,42 @@ genX(cmd_buffer_gpu_memcpy)(struct anv_cmd_buffer *cmd_buffer,
assert(dst_offset + size <= dst->size);
assert(src_offset + size <= src->size);
+ /* This memcpy expects DWord aligned memory. */
+ assert(size % 4 == 0);
+ assert(dst_offset % 4 == 0);
+ assert(src_offset % 4 == 0);
+
+ /* Use a simpler memcpy operation when copying 16 bytes or less of data.
+ * This is the size of a surface state's clear value on SKL+.
+ */
+ if (size <= 16) {
+ for (uint32_t i = 0; i < size; i += 4) {
+ const struct anv_address src_addr =
+ (struct anv_address) { src, src_offset + i};
+ const struct anv_address dst_addr =
+ (struct anv_address) { dst, dst_offset + i};
+#if GEN_GEN >= 8
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_COPY_MEM_MEM), cp) {
+ cp.DestinationMemoryAddress = dst_addr;
+ cp.SourceMemoryAddress = src_addr;
+ }
+#else
+ /* IVB does not have a general purpose register for command streamer
+ * commands. Therefore, we use an alternate temporary register.
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), load) {
+ load.RegisterAddress = MI_PREDICATE_SRC0;
+ load.MemoryAddress = src_addr;
+ }
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), store) {
+ store.RegisterAddress = MI_PREDICATE_SRC0;
+ store.MemoryAddress = dst_addr;
+ }
+#endif
+ }
+ return;
+ }
+
/* The maximum copy block size is 4 32-bit components at a time. */
unsigned bs = 16;
bs = gcd_pow2_u64(bs, src_offset);
--
2.12.2
More information about the mesa-dev
mailing list