[Mesa-dev] [PATCH 14/33] intel/blorp: Add an entrypoint for doing bit-for-bit copies
Nanley Chery
nanleychery at gmail.com
Tue Sep 6 23:12:34 UTC 2016
On Wed, Aug 31, 2016 at 02:22:33PM -0700, Jason Ekstrand wrote:
> ---
> src/intel/blorp/blorp.h | 10 ++++
> src/intel/blorp/blorp_blit.c | 133 +++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 143 insertions(+)
>
> diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
> index c1e93fd..6574124 100644
> --- a/src/intel/blorp/blorp.h
> +++ b/src/intel/blorp/blorp.h
> @@ -109,6 +109,16 @@ blorp_blit(struct blorp_batch *batch,
> uint32_t filter, bool mirror_x, bool mirror_y);
>
> void
> +blorp_copy(struct blorp_batch *batch,
> + const struct blorp_surf *src_surf,
> + unsigned src_level, unsigned src_layer,
> + const struct blorp_surf *dst_surf,
> + unsigned dst_level, unsigned dst_layer,
> + uint32_t src_x, uint32_t src_y,
> + uint32_t dst_x, uint32_t dst_y,
> + uint32_t src_width, uint32_t src_height);
> +
> +void
> blorp_fast_clear(struct blorp_batch *batch,
> const struct blorp_surf *surf,
> uint32_t level, uint32_t layer, enum isl_format format,
> diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
> index 3ab39a3..42a502c 100644
> --- a/src/intel/blorp/blorp_blit.c
> +++ b/src/intel/blorp/blorp_blit.c
> @@ -1685,3 +1685,136 @@ blorp_blit(struct blorp_batch *batch,
> dst_x0, dst_y0, dst_x1, dst_y1,
> mirror_x, mirror_y);
> }
> +
> +static enum isl_format
> +get_copy_format_for_bpb(unsigned bpb)
> +{
> + /* The choice of UNORM and UINT formats is very intentional here. Most of
> + * the time, we want to use a UINT format to avoid any rounding error in
> + * the blit. For stencil blits, R8_UINT is required by the hardware.
> + * (It's the only format allowed in conjunction with W-tiling.) Also we
> + * intentionally use the 4-channel formats whenever we can. This is so
> + * that, when we do a RGB <-> RGBX copy, the two formats will line up even
> + * though one of them is 3/4 the size of the other. The choice of UNORM
> + * vs. UINT is also very intentional because Haswell doesn't handle 8 or
> + * 16-bit RGB UINT formats at all so we have to use UNORM there.
> + * Fortunately, the only time we should ever use two different formats in
> + * the table below is for RGB -> RGBA blits and so we will never have any
> + * UNORM/UINT mismatch.
> + */
> + switch (bpb) {
> + case 8: return ISL_FORMAT_R8_UINT;
> + case 16: return ISL_FORMAT_R8G8_UINT;
> + case 24: return ISL_FORMAT_R8G8B8_UNORM;
> + case 32: return ISL_FORMAT_R8G8B8A8_UNORM;
> + case 48: return ISL_FORMAT_R16G16B16_UNORM;
> + case 64: return ISL_FORMAT_R16G16B16A16_UNORM;
> + case 96: return ISL_FORMAT_R32G32B32_UINT;
> + case 128:return ISL_FORMAT_R32G32B32A32_UINT;
> + default:
> + unreachable("Unknown format bpb");
> + }
> +}
> +
> +static void
> +surf_convert_to_uncompressed(const struct isl_device *isl_dev,
> + struct brw_blorp_surface_info *info,
> + uint32_t *x, uint32_t *y,
> + uint32_t *width, uint32_t *height)
> +{
> + const struct isl_format_layout *fmtl =
> + isl_format_get_layout(info->surf.format);
> +
> + assert(fmtl->bw > 1 || fmtl->bh > 1);
> +
> + /* This is a compressed surface. We need to convert it to a single
> + * slice (becase compressed layouts don't perfectly match uncompressed
> + * ones with the same bpb) and divide x, y, width, and height by the
> + * block size.
> + */
> + surf_convert_to_single_slice(isl_dev, info);
> +
> + if (width || height) {
> + assert(*width % fmtl->bw == 0 ||
> + *x + *width == info->surf.logical_level0_px.width);
> + assert(*height % fmtl->bh == 0 ||
> + *y + *height == info->surf.logical_level0_px.height);
> + *width = DIV_ROUND_UP(*width, fmtl->bw);
> + *height = DIV_ROUND_UP(*height, fmtl->bh);
> + }
> +
> + assert(*x % fmtl->bw == 0);
> + assert(*y % fmtl->bh == 0);
> + *x /= fmtl->bw;
> + *y /= fmtl->bh;
> +
> + info->surf.logical_level0_px.width =
> + DIV_ROUND_UP(info->surf.logical_level0_px.width, fmtl->bw);
> + info->surf.logical_level0_px.height =
> + DIV_ROUND_UP(info->surf.logical_level0_px.height, fmtl->bh);
> +
> + assert(info->surf.phys_level0_sa.width % fmtl->bw == 0);
> + assert(info->surf.phys_level0_sa.height % fmtl->bh == 0);
> + info->surf.phys_level0_sa.width /= fmtl->bw;
> + info->surf.phys_level0_sa.height /= fmtl->bh;
> +
> + assert(info->tile_x_sa % fmtl->bw == 0);
> + assert(info->tile_y_sa % fmtl->bh == 0);
> + info->tile_x_sa /= fmtl->bw;
> + info->tile_y_sa /= fmtl->bh;
> +
> + /* It's now an uncompressed surface so we need an uncompressed format */
> + info->surf.format = get_copy_format_for_bpb(fmtl->bpb);
> +}
> +
> +void
> +blorp_copy(struct blorp_batch *batch,
> + const struct blorp_surf *src_surf,
> + unsigned src_level, unsigned src_layer,
> + const struct blorp_surf *dst_surf,
> + unsigned dst_level, unsigned dst_layer,
> + uint32_t src_x, uint32_t src_y,
> + uint32_t dst_x, uint32_t dst_y,
> + uint32_t src_width, uint32_t src_height)
> +{
> + struct blorp_params params;
> + blorp_params_init(¶ms);
> +
> + brw_blorp_surface_info_init(batch->blorp, ¶ms.src, src_surf, src_level,
> + src_layer, ISL_FORMAT_UNSUPPORTED, false);
> + brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, dst_surf, dst_level,
> + dst_layer, ISL_FORMAT_UNSUPPORTED, true);
> +
> + struct brw_blorp_blit_prog_key wm_prog_key;
> + memset(&wm_prog_key, 0, sizeof(wm_prog_key));
> +
> + const struct isl_format_layout *src_fmtl =
> + isl_format_get_layout(params.src.surf.format);
> + const struct isl_format_layout *dst_fmtl =
> + isl_format_get_layout(params.dst.surf.format);
> +
> + params.src.view.format = get_copy_format_for_bpb(src_fmtl->bpb);
> + if (src_fmtl->bw > 1 || src_fmtl->bh > 1) {
> + surf_convert_to_uncompressed(batch->blorp->isl_dev, ¶ms.src,
> + &src_x, &src_y, &src_width, &src_height);
> + wm_prog_key.need_dst_offset = true;
> + }
> +
> + params.dst.view.format = get_copy_format_for_bpb(dst_fmtl->bpb);
> + if (dst_fmtl->bw > 1 || dst_fmtl->bh > 1) {
> + surf_convert_to_uncompressed(batch->blorp->isl_dev, ¶ms.dst,
> + &dst_x, &dst_y, NULL, NULL);
> + wm_prog_key.need_dst_offset = true;
> + }
When this function is later used to replace meta in vkCopyImage and the like,
I suspect that copying non-zero mip levels on D16 and S8 textures can fail.
These textures have a non-4x4 alignment on SKL and, like compressed textures,
need surf_convert_to_single_slice().
One idea is to call surf_convert_to_single_slice() when ever a copy occurs
with an image's subresource that isn't the 1st mip level on the 1st array layer
/depth slice and the alignment differs from what would be used the copy format.
Thoughts?
- Nanley
> +
> + /* Once both surfaces are stompped to uncompressed as needed, the
> + * destination size is the same as the source size.
> + */
> + uint32_t dst_width = src_width;
> + uint32_t dst_height = src_height;
> +
> + do_blorp_blit(batch, ¶ms, &wm_prog_key,
> + src_x, src_y, src_x + src_width, src_y + src_height,
> + dst_x, dst_y, dst_x + dst_width, dst_y + dst_height,
> + false, false);
> +}
> --
> 2.5.0.400.gff86faf
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list