[Mesa-dev] [PATCH 5/6] i965: Attempt to blit for larger textures
Anuj Phogat
anuj.phogat at gmail.com
Tue Mar 10 14:36:42 PDT 2015
On Mon, Mar 9, 2015 at 9:43 PM, Ben Widawsky
<benjamin.widawsky at intel.com> wrote:
> The blit engine is limited to 32Kx32K transfer. In cases where we have to fall
> back to the blitter, and when trying to blit a slice of a 2d texture array, or
> face of a cube map, we don't need to transfer the entire texture.
>
> I doubt this patch will get exercised at this point since we'll always allocate
> a linear BO for huge buffers. The next patch changes that.
>
> v2: Fix NDEBUG warning
>
> v3: Rebased with new blit computation function.
> Modify computation to account of tiling constraints (Jason, Jordan)
> Use the new computation function in y adjust function (Jason, Jordan)
> Dropped slice parameter from the y adjusting function (~Jason)
> Add assert that adjusted y offset is within bounds
> Renamed and moved the helper functions "public" in intel_blit.h
>
> v3.1:
> Fixed assertion fail from v3 (Jordan)
> Remove conditional y adjusted calculation, replace with comment (Jordan + Jason)
>
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
> ---
> src/mesa/drivers/dri/i965/intel_blit.c | 101 +++++++++++++++++++++++++++++++--
> src/mesa/drivers/dri/i965/intel_blit.h | 24 +++++++-
> 2 files changed, 118 insertions(+), 7 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
> index c7f4cf3..832dad1 100644
> --- a/src/mesa/drivers/dri/i965/intel_blit.c
> +++ b/src/mesa/drivers/dri/i965/intel_blit.c
> @@ -130,6 +130,92 @@ set_blitter_tiling(struct brw_context *brw,
> ADVANCE_BATCH(); \
> } while (0)
>
> +/* This function returns the offset to be used by the blit operation. It may
> + * modify the y if the texture would otherwise fail to be able to perform a
> + * blit. The x offset will not need to change based on the computations made by
> + * this function.
> + *
> + * By the time we get to this function, the miptree creation code should have
> + * already determined it's possible to blit the texture, so there should never
> + * be a case where this function fails.
> + */
> +static GLuint
> +intel_miptree_get_adjusted_y_offset(struct intel_mipmap_tree *mt, uint32_t *y)
> +{
> + GLuint offset = mt->offset;
> +
> + /* Convert an input number of rows: y into 2 values: an offset (page aligned
> + * in byte units), and the remaining rows of y. The resulting 2 values will
> + * be used as parameters for a blit operation [using the HW blit engine].
> + * They will therefore conform to whatever restrictions are needed.
> + *
> + * XXX: This code assumes that LOD0 is always guaranteed to be properly
> + * aligned for the blit operation. The round down only mutates y if the LOD
> + * being adjusted isn't tile aligned. In other words, if input y is pointing
> + * to LOD0 of a slice, the adjusted y should always be 0. Similarly if input
> + * y is pointing to another LOD, and the offset happens to be tile aligned, y
> + * will again be 0.
> + *
> + * The following diagram shows how the blit parameters are modified. In the
> + * example, is is trying to blit with LOD1 from slice[x] as a surface, and
It is trying
> + * LOD1 is not properly tile aligned. "TA" means tile aligned. The rectangle
> + * is the BO that contains the mipmaps. There may be an offset from the start
> + * of the BO to the first slice.
> + *
> + * INPUT OUTPUT
> + * 0 +---------------------------+
> + * | | +---------------------------+
> + * offset | slice[0]...slice[x-2] | offset | +----------+ |
> + * | | | | lod0 | slice[x] |
> + * TA | +----------+ | | | | |
> + * | | lod0 | slice[x-1] | | +----------+ |
> + * | | | | y---> | +---+ +-+ |
> + * | +----------+ | | | | +-+ |
> + * | +---+ +-+ | | +---+ * |
> + * | | | +-+ | | |
> + * | +---+ * | | slice[x+1]... |
> + * | | +---------------------------+
> + * | // qpitch padding |
> + * | |
> + * TA | +----------+ |
> + * | | lod0 | slice[x] |
> + * | | | |
> + * | +----------+ |
> + * y---> | +---+ +-+ |
> + * | | | +-+ |
> + * | +---+ * |
> + * | |
> + * | slice[x+1]... |
> + * +---------------------------+
> + */
> +
> + /* The following calculation looks fancy. In the common case, slice == 0
> + * and/or the full mipmap fits within blitter constraints, it should be
> + * equivalent to the simple:
> + * return offset;
> + */
> + const long TILE_MASK =
> + mt->tiling != I915_TILING_NONE ? sysconf(_SC_PAGE_SIZE) - 1 : 0;
> + (void) TILE_MASK;
> +
> + /* Since we need to output a page aligned offset, the original offset must
> + * also be page aligned. For tiled buffers, it always should be. */
> + assert((offset & TILE_MASK) == 0);
> +
> + /* Adjust the y value to pick the nearest tile aligned mipmap row */
> + unsigned tile_aligned_row =
> + ROUND_DOWN_TO(*y, intel_blit_tile_height(mt->tiling));
> + *y -= tile_aligned_row;
> +
> + /* Convert tiled aligned row to a byte offset for use by the blitter */
> + tile_aligned_row *= mt->pitch;
> + assert((tile_aligned_row & TILE_MASK) == 0);
> + offset += tile_aligned_row;
> +
> + assert(*y < intel_blit_max_height(mt->tiling));
> + return offset;
> +}
> +
> /**
> * Implements a rectangular block transfer (blit) of pixels between two
> * miptrees.
> @@ -240,22 +326,27 @@ intel_miptree_blit(struct brw_context *brw,
> dst_x += dst_image_x;
> dst_y += dst_image_y;
>
> + GLuint src_offset = intel_miptree_get_adjusted_y_offset(src_mt, &src_y);
> + GLuint dst_offset = intel_miptree_get_adjusted_y_offset(dst_mt, &dst_y);
> +
> if (src_x >= INTEL_MAX_BLIT_PITCH || dst_x >= INTEL_MAX_BLIT_PITCH ||
> - src_y >= intel_blit_max_height() ||
> - dst_y >= intel_blit_max_height()) {
> + src_y >= intel_blit_max_height(src_mt->tiling) ||
> + dst_y >= intel_blit_max_height(dst_mt->tiling)) {
> perf_debug("Falling back due to >=%dk offset [src(%d, %d) dst(%d, %d)]\n",
> src_x, src_y, dst_x, dst_y,
> - MAX2(intel_blit_max_height(), INTEL_MAX_BLIT_PITCH) >> 20);
> + MAX3(intel_blit_max_height(src_mt->tiling),
> + intel_blit_max_height(dst_mt->tiling),
> + INTEL_MAX_BLIT_PITCH) >> 20);
Should be >> 10 to get 32.
> return false;
> }
>
> if (!intelEmitCopyBlit(brw,
> src_mt->cpp,
> src_pitch,
> - src_mt->bo, src_mt->offset,
> + src_mt->bo, src_offset,
> src_mt->tiling,
> dst_mt->pitch,
> - dst_mt->bo, dst_mt->offset,
> + dst_mt->bo, dst_offset,
> dst_mt->tiling,
> src_x, src_y,
> dst_x, dst_y,
> diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h
> index 52dd67c..aff2d58 100644
> --- a/src/mesa/drivers/dri/i965/intel_blit.h
> +++ b/src/mesa/drivers/dri/i965/intel_blit.h
> @@ -78,14 +78,34 @@ void intel_emit_linear_blit(struct brw_context *brw,
> unsigned int src_offset,
> unsigned int size);
>
> +
> +/* Returns the height of the tiling format. This would be measured in scanlines
> + * (of pitch bytes)
> + */
> +static inline uint32_t
> +intel_blit_tile_height(uint32_t tiling)
> +{
> + const long PAGE_SIZE = sysconf(_SC_PAGE_SIZE);
> + switch (tiling) {
> + case I915_TILING_X:
> + return PAGE_SIZE / 512;
> + case I915_TILING_Y:
> + return PAGE_SIZE / 128;
> + case I915_TILING_NONE:
> + return 1;
> + default:
> + unreachable("Unknown tiling format\n");
> + }
> +}
> +
> static inline uint32_t
> -intel_blit_max_height(void)
> +intel_blit_max_height(uint32_t tiling)
> {
> /* The docs say that the blitter is capable of transferring 65536 scanlines
> * per blit, however the commands we use only have a signed 16b value thus
> * making the practical limit 15b.
> */
> - return INTEL_MAX_BLIT_ROWS;
> + return INTEL_MAX_BLIT_ROWS - intel_blit_tile_height(tiling);
> }
>
> #endif
> --
> 2.3.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list