[Mesa-dev] [PATCH 12/22] anv/gpu_memcpy: Add a lighter-weight memcpy path

Nanley Chery nanleychery at gmail.com
Thu Apr 27 18:32:11 UTC 2017


We're now performing a GPU memcpy in more places to copy small amounts
of data. Add a path to thrash less state.

Signed-off-by: Nanley Chery <nanley.g.chery at intel.com>
---
 src/intel/vulkan/genX_gpu_memcpy.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c
index 3cbc7235cf..f15c2a5f72 100644
--- a/src/intel/vulkan/genX_gpu_memcpy.c
+++ b/src/intel/vulkan/genX_gpu_memcpy.c
@@ -28,6 +28,8 @@
 
 #include "common/gen_l3_config.h"
 
+#define MI_PREDICATE_SRC0 0x2400
+
 /**
  * This file implements some lightweight memcpy/memset operations on the GPU
  * using a vertex buffer and streamout.
@@ -63,6 +65,42 @@ genX(cmd_buffer_gpu_memcpy)(struct anv_cmd_buffer *cmd_buffer,
    assert(dst_offset + size <= dst->size);
    assert(src_offset + size <= src->size);
 
+   /* This memcpy expects DWord aligned memory. */
+   assert(size % 4 == 0);
+   assert(dst_offset % 4 == 0);
+   assert(src_offset % 4 == 0);
+
+   /* Use a simpler memcpy operation when copying 16 bytes or less of data.
+    * This is the size of a surface state's clear value on SKL+.
+    */
+   if (size <= 16) {
+      for (uint32_t i = 0; i < size; i += 4) {
+         const struct anv_address src_addr =
+            (struct anv_address) { src, src_offset + i};
+         const struct anv_address dst_addr =
+            (struct anv_address) { dst, dst_offset + i};
+#if GEN_GEN >= 8
+         anv_batch_emit(&cmd_buffer->batch, GENX(MI_COPY_MEM_MEM), cp) {
+            cp.DestinationMemoryAddress = dst_addr;
+            cp.SourceMemoryAddress = src_addr;
+         }
+#else
+         /* IVB does not have a general purpose register for command streamer
+          * commands. Therefore, we use an alternate temporary register.
+          */
+         anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), load) {
+            load.RegisterAddress = MI_PREDICATE_SRC0;
+            load.MemoryAddress = src_addr;
+         }
+         anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), store) {
+            store.RegisterAddress = MI_PREDICATE_SRC0;
+            store.MemoryAddress = dst_addr;
+         }
+#endif
+      }
+      return;
+   }
+
    /* The maximum copy block size is 4 32-bit components at a time. */
    unsigned bs = 16;
    bs = gcd_pow2_u64(bs, src_offset);
-- 
2.12.2



More information about the mesa-dev mailing list