[Mesa-dev] [PATCH 5/6] i965: Attempt to blit for larger textures

Ben Widawsky benjamin.widawsky at intel.com
Mon Mar 9 21:43:35 PDT 2015


The blit engine is limited to 32Kx32K transfer. In cases where we have to fall
back to the blitter, and when trying to blit a slice of a 2d texture array, or
face of a cube map, we don't need to transfer the entire texture.

I doubt this patch will get exercised at this point since we'll always allocate
a linear BO for huge buffers. The next patch changes that.

v2: Fix NDEBUG warning

v3: Rebased with new blit computation function.
Modify computation to account of tiling constraints (Jason, Jordan)
Use the new computation function in y adjust function (Jason, Jordan)
Dropped slice parameter from the y adjusting function (~Jason)
Add assert that adjusted y offset is within bounds
Renamed and moved the helper functions "public" in intel_blit.h

v3.1:
Fixed assertion fail from v3 (Jordan)
Remove conditional y adjusted calculation, replace with comment (Jordan + Jason)

Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
 src/mesa/drivers/dri/i965/intel_blit.c | 101 +++++++++++++++++++++++++++++++--
 src/mesa/drivers/dri/i965/intel_blit.h |  24 +++++++-
 2 files changed, 118 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index c7f4cf3..832dad1 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -130,6 +130,92 @@ set_blitter_tiling(struct brw_context *brw,
       ADVANCE_BATCH();                                                  \
    } while (0)
 
+/* This function returns the offset to be used by the blit operation. It may
+ * modify the y if the texture would otherwise fail to be able to perform a
+ * blit. The x offset will not need to change based on the computations made by
+ * this function.
+ *
+ * By the time we get to this function, the miptree creation code should have
+ * already determined it's possible to blit the texture, so there should never
+ * be a case where this function fails.
+ */
+static GLuint
+intel_miptree_get_adjusted_y_offset(struct intel_mipmap_tree *mt, uint32_t *y)
+{
+   GLuint offset = mt->offset;
+
+   /* Convert an input number of rows: y into 2 values: an offset (page aligned
+    * in byte units), and the remaining rows of y. The resulting 2 values will
+    * be used as parameters for a blit operation [using the HW blit engine].
+    * They will therefore conform to whatever restrictions are needed.
+    *
+    * XXX: This code assumes that LOD0 is always guaranteed to be properly
+    * aligned for the blit operation. The round down only mutates y if the LOD
+    * being adjusted isn't tile aligned. In other words, if input y is pointing
+    * to LOD0 of a slice, the adjusted y should always be 0. Similarly if input
+    * y is pointing to another LOD, and the offset happens to be tile aligned, y
+    * will again be 0.
+    *
+    * The following diagram shows how the blit parameters are modified. In the
+    * example, is is trying to blit with LOD1 from slice[x] as a surface, and
+    * LOD1 is not properly tile aligned.  "TA" means tile aligned. The rectangle
+    * is the BO that contains the mipmaps. There may be an offset from the start
+    * of the BO to the first slice.
+    *
+    *                   INPUT                               OUTPUT
+    *   0    +---------------------------+
+    *        |                           |        +---------------------------+
+    * offset |  slice[0]...slice[x-2]    | offset |  +----------+             |
+    *        |                           |        |  |  lod0    | slice[x]    |
+    *   TA   |  +----------+             |        |  |          |             |
+    *        |  |  lod0    | slice[x-1]  |        |  +----------+             |
+    *        |  |          |             |  y---> |  +---+ +-+                |
+    *        |  +----------+             |        |  |   | +-+                |
+    *        |  +---+ +-+                |        |  +---+ *                  |
+    *        |  |   | +-+                |        |                           |
+    *        |  +---+ *                  |        |  slice[x+1]...            |
+    *        |                           |        +---------------------------+
+    *        |  // qpitch padding        |
+    *        |                           |
+    *   TA   |  +----------+             |
+    *        |  |  lod0    | slice[x]    |
+    *        |  |          |             |
+    *        |  +----------+             |
+    *  y---> |  +---+ +-+                |
+    *        |  |   | +-+                |
+    *        |  +---+ *                  |
+    *        |                           |
+    *        |  slice[x+1]...            |
+    *        +---------------------------+
+    */
+
+   /* The following calculation looks fancy. In the common case, slice == 0
+    * and/or the full mipmap fits within blitter constraints, it should be
+    * equivalent to the simple:
+    * return offset;
+    */
+   const long TILE_MASK =
+      mt->tiling != I915_TILING_NONE ? sysconf(_SC_PAGE_SIZE) - 1 : 0;
+   (void) TILE_MASK;
+
+   /* Since we need to output a page aligned offset, the original offset must
+    * also be page aligned. For tiled buffers, it always should be. */
+   assert((offset & TILE_MASK) == 0);
+
+   /* Adjust the y value to pick the nearest tile aligned mipmap row */
+   unsigned tile_aligned_row =
+      ROUND_DOWN_TO(*y, intel_blit_tile_height(mt->tiling));
+   *y -= tile_aligned_row;
+
+   /* Convert tiled aligned row to a byte offset for use by the blitter */
+   tile_aligned_row *= mt->pitch;
+   assert((tile_aligned_row & TILE_MASK) == 0);
+   offset += tile_aligned_row;
+
+   assert(*y < intel_blit_max_height(mt->tiling));
+   return offset;
+}
+
 /**
  * Implements a rectangular block transfer (blit) of pixels between two
  * miptrees.
@@ -240,22 +326,27 @@ intel_miptree_blit(struct brw_context *brw,
    dst_x += dst_image_x;
    dst_y += dst_image_y;
 
+   GLuint src_offset = intel_miptree_get_adjusted_y_offset(src_mt, &src_y);
+   GLuint dst_offset = intel_miptree_get_adjusted_y_offset(dst_mt, &dst_y);
+
    if (src_x >= INTEL_MAX_BLIT_PITCH || dst_x >= INTEL_MAX_BLIT_PITCH ||
-       src_y >= intel_blit_max_height() ||
-       dst_y >= intel_blit_max_height()) {
+       src_y >= intel_blit_max_height(src_mt->tiling) ||
+       dst_y >= intel_blit_max_height(dst_mt->tiling)) {
       perf_debug("Falling back due to >=%dk offset [src(%d, %d) dst(%d, %d)]\n",
                  src_x, src_y, dst_x, dst_y,
-                 MAX2(intel_blit_max_height(), INTEL_MAX_BLIT_PITCH) >> 20);
+                 MAX3(intel_blit_max_height(src_mt->tiling),
+                      intel_blit_max_height(dst_mt->tiling),
+                      INTEL_MAX_BLIT_PITCH) >> 20);
       return false;
    }
 
    if (!intelEmitCopyBlit(brw,
                           src_mt->cpp,
                           src_pitch,
-                          src_mt->bo, src_mt->offset,
+                          src_mt->bo, src_offset,
                           src_mt->tiling,
                           dst_mt->pitch,
-                          dst_mt->bo, dst_mt->offset,
+                          dst_mt->bo, dst_offset,
                           dst_mt->tiling,
                           src_x, src_y,
                           dst_x, dst_y,
diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h
index 52dd67c..aff2d58 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.h
+++ b/src/mesa/drivers/dri/i965/intel_blit.h
@@ -78,14 +78,34 @@ void intel_emit_linear_blit(struct brw_context *brw,
 			    unsigned int src_offset,
 			    unsigned int size);
 
+
+/* Returns the height of the tiling format. This would be measured in scanlines
+ * (of pitch bytes)
+ */
+static inline uint32_t
+intel_blit_tile_height(uint32_t tiling)
+{
+   const long PAGE_SIZE = sysconf(_SC_PAGE_SIZE);
+   switch (tiling) {
+   case I915_TILING_X:
+      return PAGE_SIZE / 512;
+   case I915_TILING_Y:
+      return PAGE_SIZE / 128;
+   case I915_TILING_NONE:
+      return 1;
+   default:
+      unreachable("Unknown tiling format\n");
+   }
+}
+
 static inline uint32_t
-intel_blit_max_height(void)
+intel_blit_max_height(uint32_t tiling)
 {
    /* The docs say that the blitter is capable of transferring 65536 scanlines
     * per blit, however the commands we use only have a signed 16b value thus
     * making the practical limit 15b.
     */
-   return INTEL_MAX_BLIT_ROWS;
+   return INTEL_MAX_BLIT_ROWS - intel_blit_tile_height(tiling);
 }
 
 #endif
-- 
2.3.1



More information about the mesa-dev mailing list