[Mesa-dev] [PATCH 4/6] i965/blit: Break blits into chunks in intel_miptree_blit

Mon Oct 24 22:29:14 UTC 2016

This allows us to blit much larger images than if we use the blitter
directly.  In particular, it gives us an almost infinite image height
compared to the fairly limiting 32k.  We do, however, still have a
restriction on stride of the image because handling larger strides, while
possible, is fairly difficult.

Signed-off-by: Jason Ekstrand <jason at jlekstrand.net>
---
 src/mesa/drivers/dri/i965/intel_blit.c | 65 ++++++++++++++++++++++++++--------
 1 file changed, 50 insertions(+), 15 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index bc97e66..95d00d3 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -287,21 +287,56 @@ intel_miptree_blit(struct brw_context *brw,
       return false;
    }
 
-   if (!intelEmitCopyBlit(brw,
-                          src_mt->cpp,
-                          src_flip == dst_flip ? src_mt->pitch : -src_mt->pitch,
-                          src_mt->bo, src_mt->offset,
-                          src_mt->tiling,
-                          src_mt->tr_mode,
-                          dst_mt->pitch,
-                          dst_mt->bo, dst_mt->offset,
-                          dst_mt->tiling,
-                          dst_mt->tr_mode,
-                          src_x, src_y,
-                          dst_x, dst_y,
-                          width, height,
-                          logicop)) {
-      return false;
+   const enum isl_tiling src_tiling = intel_miptree_get_isl_tiling(src_mt);
+   const enum isl_tiling dst_tiling = intel_miptree_get_isl_tiling(dst_mt);
+
+   /* We need to split the blit into chunks that each fit within the blitter's
+    * restrictions.  We can't use a chunk size of 32768 because wee need to
+    * ensure that src_tile_x + chunk_size fits.  We choose 16384 because it's
+    * a nice round power of two, big enough that performance won't suffer, and
+    * small enough to guarantee everything fits.
+    */
+   const uint32_t max_chunk_size = 16384;
+
+   for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) {
+      for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) {
+         const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x);
+         const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y);
+
+         uint32_t src_offset, src_tile_x, src_tile_y;
+         isl_tiling_get_intratile_offset_el(&brw->isl_dev, src_tiling,
+                                            src_mt->cpp, src_mt->pitch,
+                                            src_x + chunk_x, src_y + chunk_y,
+                                            &src_offset,
+                                            &src_tile_x, &src_tile_y);
+
+         uint32_t dst_offset, dst_tile_x, dst_tile_y;
+         isl_tiling_get_intratile_offset_el(&brw->isl_dev, dst_tiling,
+                                            dst_mt->cpp, dst_mt->pitch,
+                                            dst_x + chunk_x, dst_y + chunk_y,
+                                            &dst_offset,
+                                            &dst_tile_x, &dst_tile_y);
+
+         if (!intelEmitCopyBlit(brw,
+                                src_mt->cpp,
+                                src_flip == dst_flip ? src_mt->pitch :
+                                                       -src_mt->pitch,
+                                src_mt->bo, src_mt->offset + src_offset,
+                                src_mt->tiling,
+                                src_mt->tr_mode,
+                                dst_mt->pitch,
+                                dst_mt->bo, dst_mt->offset + dst_offset,
+                                dst_mt->tiling,
+                                dst_mt->tr_mode,
+                                src_tile_x, src_tile_y,
+                                dst_tile_x, dst_tile_y,
+                                chunk_w, chunk_h,
+                                logicop)) {
+            /* If this is ever going to fail, it will fail on the first chunk */
+            assert(chunk_x == 0 && chunk_y == 0);
+            return false;
+         }
+      }
    }
 
    /* XXX This could be done in a single pass using XY_FULL_MONO_PATTERN_BLT */
-- 
2.5.0.400.gff86faf