Mesa (main): blorp: Add support for blorp_copy via XY_BLOCK_COPY_BLT

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Jan 24 23:47:36 UTC 2022


Module: Mesa
Branch: main
Commit: 31eeb72e45be6ef943df0b60f3cd7a646fa7b349
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=31eeb72e45be6ef943df0b60f3cd7a646fa7b349

Author: Kenneth Graunke <kenneth at whitecape.org>
Date:   Mon Oct 25 19:02:42 2021 -0700

blorp: Add support for blorp_copy via XY_BLOCK_COPY_BLT

This introduces a new blorp_copy() path using the new XY_BLOCK_COPY_BLT
blitter command introduced on Tigerlake.  Unlike the blitter commands of
old, this one is actually fast and worth using.  Although it doesn't use
shaders like the rest of BLORP, we still can use some surface-munging
code from there, and BLORP also provides a nice place to put this which
is shared among the drivers.

To use the new path, set BLORP_BATCH_USE_BLITTER (much like Jordan's
recent BLORP_BATCH_USE_COMPUTE bit) and target the batch at the copy
engine.

Reviewed-by: Caio Oliveira <caio.oliveira at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14687>

---

 src/intel/blorp/blorp.h           |  10 ++
 src/intel/blorp/blorp_blit.c      |  75 ++++++++++++
 src/intel/blorp/blorp_genX_exec.h | 237 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 320 insertions(+), 2 deletions(-)

diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
index 4e4804fee75..3682a363324 100644
--- a/src/intel/blorp/blorp.h
+++ b/src/intel/blorp/blorp.h
@@ -89,6 +89,9 @@ enum blorp_batch_flags {
     * operation.
     */
    BLORP_BATCH_USE_COMPUTE = (1 << 3),
+
+   /** Use the hardware blitter to perform any operations in this batch */
+   BLORP_BATCH_USE_BLITTER = (1 << 4),
 };
 
 struct blorp_batch {
@@ -203,6 +206,13 @@ blorp_blit_supports_compute(struct blorp_context *blorp,
                             const struct isl_surf *dst_surf,
                             enum isl_aux_usage dst_aux_usage);
 
+bool
+blorp_copy_supports_blitter(struct blorp_context *blorp,
+                            const struct isl_surf *src_surf,
+                            const struct isl_surf *dst_surf,
+                            enum isl_aux_usage src_aux_usage,
+                            enum isl_aux_usage dst_aux_usage);
+
 void
 blorp_clear(struct blorp_batch *batch,
             const struct blorp_surf *surf,
diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index 533b636b7fb..69381d5870d 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -2455,6 +2455,61 @@ blorp_blit_supports_compute(struct blorp_context *blorp,
    }
 }
 
+static bool
+blitter_supports_aux(const struct intel_device_info *devinfo,
+                     enum isl_aux_usage aux_usage)
+{
+   switch (aux_usage) {
+   case ISL_AUX_USAGE_NONE:
+      return true;
+   case ISL_AUX_USAGE_CCS_E:
+   case ISL_AUX_USAGE_GFX12_CCS_E:
+      return devinfo->verx10 >= 125;
+   default:
+      return false;
+   }
+}
+
+bool
+blorp_copy_supports_blitter(struct blorp_context *blorp,
+                            const struct isl_surf *src_surf,
+                            const struct isl_surf *dst_surf,
+                            enum isl_aux_usage src_aux_usage,
+                            enum isl_aux_usage dst_aux_usage)
+{
+   const struct intel_device_info *devinfo = blorp->isl_dev->info;
+
+   if (devinfo->ver < 12)
+      return false;
+
+   if (dst_surf->samples > 1 || src_surf->samples > 1)
+      return false;
+
+   if (!blitter_supports_aux(devinfo, dst_aux_usage))
+      return false;
+
+   if (!blitter_supports_aux(devinfo, src_aux_usage))
+      return false;
+
+   const struct isl_format_layout *fmtl =
+      isl_format_get_layout(dst_surf->format);
+
+   if (fmtl->bpb == 96) {
+      /* XY_BLOCK_COPY_BLT mentions it doesn't support clear colors for 96bpp
+       * formats, but none of them support CCS anyway, so it's a moot point.
+       */
+      assert(src_aux_usage == ISL_AUX_USAGE_NONE);
+      assert(dst_aux_usage == ISL_AUX_USAGE_NONE);
+
+      /* We can only support linear mode for 96bpp. */
+      if (src_surf->tiling != ISL_TILING_LINEAR ||
+          dst_surf->tiling != ISL_TILING_LINEAR)
+         return false;
+   }
+
+   return true;
+}
+
 void
 blorp_blit(struct blorp_batch *batch,
            const struct blorp_surf *src_surf,
@@ -2806,6 +2861,7 @@ blorp_copy(struct blorp_batch *batch,
            uint32_t src_width, uint32_t src_height)
 {
    const struct isl_device *isl_dev = batch->blorp->isl_dev;
+   const struct intel_device_info *devinfo = isl_dev->info;
    struct blorp_params params;
 
    if (src_width == 0 || src_height == 0)
@@ -2931,6 +2987,25 @@ blorp_copy(struct blorp_batch *batch,
    uint32_t dst_width = src_width;
    uint32_t dst_height = src_height;
 
+   if (batch->flags & BLORP_BATCH_USE_BLITTER) {
+      if (devinfo->verx10 < 125) {
+         blorp_surf_convert_to_single_slice(isl_dev, &params.dst);
+         blorp_surf_convert_to_single_slice(isl_dev, &params.src);
+      }
+
+      params.x0 = dst_x;
+      params.x1 = dst_x + dst_width;
+      params.y0 = dst_y;
+      params.y1 = dst_y + dst_height;
+      params.wm_inputs.coord_transform[0].offset = dst_x - (float)src_x;
+      params.wm_inputs.coord_transform[1].offset = dst_y - (float)src_y;
+      params.wm_inputs.coord_transform[0].multiplier = 1.0f;
+      params.wm_inputs.coord_transform[1].multiplier = 1.0f;
+
+      batch->blorp->exec(batch, &params);
+      return;
+   }
+
    struct blt_coords coords = {
       .x = {
          .src0 = src_x,
diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h
index 11d7bb6c107..24bd0d735ff 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -2295,6 +2295,232 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
    blorp_measure_end(batch, params);
 }
 
+/* -----------------------------------------------------------------------
+ * -- BLORP on blitter
+ * -----------------------------------------------------------------------
+ */
+
+#include "isl/isl_genX_helpers.h"
+
+#if GFX_VER >= 12
+static uint32_t
+xy_bcb_tiling(const struct isl_surf *surf)
+{
+   switch (surf->tiling) {
+   case ISL_TILING_LINEAR:
+      return XY_TILE_LINEAR;
+#if GFX_VERx10 >= 125
+   case ISL_TILING_X:
+      return XY_TILE_X;
+   case ISL_TILING_4:
+      return XY_TILE_4;
+   case ISL_TILING_64:
+      return XY_TILE_64;
+#else
+   case ISL_TILING_Y0:
+      return XY_TILE_Y;
+#endif
+   default:
+      unreachable("Invalid tiling for XY_BLOCK_COPY_BLT");
+   }
+}
+
+static uint32_t
+xy_color_depth(const struct isl_format_layout *fmtl)
+{
+   switch (fmtl->bpb) {
+   case 128: return XY_BPP_128_BIT;
+   case  96: return XY_BPP_96_BIT;
+   case  64: return XY_BPP_64_BIT;
+   case  32: return XY_BPP_32_BIT;
+   case  16: return XY_BPP_16_BIT;
+   case   8: return XY_BPP_8_BIT;
+   default:
+      unreachable("Invalid bpp");
+   }
+}
+#endif
+
+#if GFX_VERx10 >= 125
+static uint32_t
+xy_bcb_surf_dim(const struct isl_surf *surf)
+{
+   switch (surf->dim) {
+   case ISL_SURF_DIM_1D:
+      return XY_SURFTYPE_1D;
+   case ISL_SURF_DIM_2D:
+      return XY_SURFTYPE_2D;
+   case ISL_SURF_DIM_3D:
+      return XY_SURFTYPE_3D;
+   default:
+      unreachable("Invalid dimensionality for XY_BLOCK_COPY_BLT");
+   }
+}
+
+static uint32_t
+xy_bcb_surf_depth(const struct isl_surf *surf)
+{
+   return surf->dim == ISL_SURF_DIM_3D ? surf->logical_level0_px.depth
+                                       : surf->logical_level0_px.array_len;
+}
+
+static uint32_t
+xy_aux_mode(const struct brw_blorp_surface_info *info)
+{
+   switch (info->aux_usage) {
+   case ISL_AUX_USAGE_CCS_E:
+   case ISL_AUX_USAGE_GFX12_CCS_E:
+      return XY_CCS_E;
+   case ISL_AUX_USAGE_NONE:
+      return XY_NONE;
+   default:
+      unreachable("Unsupported aux mode");
+   }
+}
+#endif
+
+UNUSED static void
+blorp_xy_block_copy_blt(struct blorp_batch *batch,
+                        const struct blorp_params *params)
+{
+#if GFX_VER < 12
+   unreachable("Blitter is only suppotred on Gfx12+");
+#else
+   UNUSED const struct isl_device *isl_dev = batch->blorp->isl_dev;
+
+   assert(batch->flags & BLORP_BATCH_USE_BLITTER);
+   assert(!(batch->flags & BLORP_BATCH_NO_UPDATE_CLEAR_COLOR));
+   assert(!(batch->flags & BLORP_BATCH_PREDICATE_ENABLE));
+   assert(params->hiz_op == ISL_AUX_OP_NONE);
+
+   assert(params->num_layers == 1);
+   assert(params->dst.view.levels == 1);
+   assert(params->src.view.levels == 1);
+
+#if GFX_VERx10 < 125
+   assert(params->dst.view.base_array_layer == 0);
+   assert(params->dst.z_offset == 0);
+#endif
+
+   unsigned dst_x0 = params->x0;
+   unsigned dst_x1 = params->x1;
+   unsigned src_x0 =
+      dst_x0 - params->wm_inputs.coord_transform[0].offset;
+   ASSERTED unsigned src_x1 =
+      dst_x1 - params->wm_inputs.coord_transform[0].offset;
+   unsigned dst_y0 = params->y0;
+   unsigned dst_y1 = params->y1;
+   unsigned src_y0 =
+      dst_y0 - params->wm_inputs.coord_transform[1].offset;
+   ASSERTED unsigned src_y1 =
+      dst_y1 - params->wm_inputs.coord_transform[1].offset;
+
+   assert(src_x1 - src_x0 == dst_x1 - dst_x0);
+   assert(src_y1 - src_y0 == dst_y1 - dst_y0);
+
+   const struct isl_surf *src_surf = &params->src.surf;
+   const struct isl_surf *dst_surf = &params->dst.surf;
+
+   const struct isl_format_layout *fmtl =
+      isl_format_get_layout(params->dst.view.format);
+
+   if (fmtl->bpb == 96) {
+      assert(src_surf->tiling == ISL_TILING_LINEAR &&
+             dst_surf->tiling == ISL_TILING_LINEAR);
+   }
+
+   assert(src_surf->samples == 1);
+   assert(dst_surf->samples == 1);
+
+   unsigned dst_pitch_unit = dst_surf->tiling == ISL_TILING_LINEAR ? 1 : 4;
+   unsigned src_pitch_unit = src_surf->tiling == ISL_TILING_LINEAR ? 1 : 4;
+
+#if GFX_VERx10 >= 125
+   struct isl_extent3d src_align = isl_get_image_alignment(src_surf);
+   struct isl_extent3d dst_align = isl_get_image_alignment(dst_surf);
+#endif
+
+   blorp_emit(batch, GENX(XY_BLOCK_COPY_BLT), blt) {
+      blt.ColorDepth = xy_color_depth(fmtl);
+
+      blt.DestinationPitch = (dst_surf->row_pitch_B / dst_pitch_unit) - 1;
+      blt.DestinationMOCS = params->dst.addr.mocs;
+      blt.DestinationTiling = xy_bcb_tiling(dst_surf);
+      blt.DestinationX1 = dst_x0;
+      blt.DestinationY1 = dst_y0;
+      blt.DestinationX2 = dst_x1;
+      blt.DestinationY2 = dst_y1;
+      blt.DestinationBaseAddress = params->dst.addr;
+      blt.DestinationXOffset = params->dst.tile_x_sa;
+      blt.DestinationYOffset = params->dst.tile_y_sa;
+
+#if GFX_VERx10 >= 125
+      blt.DestinationSurfaceType = xy_bcb_surf_dim(dst_surf);
+      blt.DestinationSurfaceWidth = dst_surf->logical_level0_px.w - 1;
+      blt.DestinationSurfaceHeight = dst_surf->logical_level0_px.h - 1;
+      blt.DestinationSurfaceDepth = xy_bcb_surf_depth(dst_surf) - 1;
+      blt.DestinationArrayIndex =
+         params->dst.view.base_array_layer + params->dst.z_offset;
+      blt.DestinationSurfaceQPitch = isl_get_qpitch(dst_surf) >> 2;
+      blt.DestinationLOD = params->dst.view.base_level;
+      blt.DestinationMipTailStartLOD = 15;
+      blt.DestinationHorizontalAlign = isl_encode_halign(dst_align.width);
+      blt.DestinationVerticalAlign = isl_encode_valign(dst_align.height);
+      blt.DestinationDepthStencilResource = false;
+      blt.DestinationTargetMemory =
+         params->dst.addr.local_hint ? XY_MEM_LOCAL : XY_MEM_SYSTEM;
+
+      if (params->dst.aux_usage != ISL_AUX_USAGE_NONE) {
+         blt.DestinationAuxiliarySurfaceMode = xy_aux_mode(&params->dst);
+         blt.DestinationCompressionEnable = true;
+         blt.DestinationCompressionFormat =
+            isl_get_render_compression_format(dst_surf->format);
+         blt.DestinationClearValueEnable = !!params->dst.clear_color_addr.buffer;
+         blt.DestinationClearAddress = params->dst.clear_color_addr;
+      }
+#endif
+
+      blt.SourceX1 = src_x0;
+      blt.SourceY1 = src_y0;
+      blt.SourcePitch = (src_surf->row_pitch_B / src_pitch_unit) - 1;
+      blt.SourceMOCS = params->src.addr.mocs;
+      blt.SourceTiling = xy_bcb_tiling(src_surf);
+      blt.SourceBaseAddress = params->src.addr;
+      blt.SourceXOffset = params->src.tile_x_sa;
+      blt.SourceYOffset = params->src.tile_y_sa;
+
+#if GFX_VERx10 >= 125
+      blt.SourceSurfaceType = xy_bcb_surf_dim(src_surf);
+      blt.SourceSurfaceWidth = src_surf->logical_level0_px.w - 1;
+      blt.SourceSurfaceHeight = src_surf->logical_level0_px.h - 1;
+      blt.SourceSurfaceDepth = xy_bcb_surf_depth(src_surf) - 1;
+      blt.SourceArrayIndex =
+         params->src.view.base_array_layer + params->src.z_offset;
+      blt.SourceSurfaceQPitch = isl_get_qpitch(src_surf) >> 2;
+      blt.SourceLOD = params->src.view.base_level;
+      blt.SourceMipTailStartLOD = 15;
+      blt.SourceHorizontalAlign = isl_encode_halign(src_align.width);
+      blt.SourceVerticalAlign = isl_encode_valign(src_align.height);
+      blt.SourceDepthStencilResource = false;
+      blt.SourceTargetMemory =
+         params->src.addr.local_hint ? XY_MEM_LOCAL : XY_MEM_SYSTEM;
+
+      if (params->src.aux_usage != ISL_AUX_USAGE_NONE) {
+         blt.SourceAuxiliarySurfaceMode = xy_aux_mode(&params->src);
+         blt.SourceCompressionEnable = true;
+         blt.SourceCompressionFormat =
+            isl_get_render_compression_format(src_surf->format);
+         blt.SourceClearValueEnable = !!params->src.clear_color_addr.buffer;
+         blt.SourceClearAddress = params->src.clear_color_addr;
+      }
+
+      /* XeHP needs special MOCS values for the blitter */
+      blt.DestinationMOCS = isl_dev->mocs.blitter_dst;
+      blt.SourceMOCS = isl_dev->mocs.blitter_src;
+#endif
+   }
+#endif
+}
 
 /**
  * \brief Execute a blit or render pass operation.
@@ -2308,10 +2534,17 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
 static void
 blorp_exec(struct blorp_batch *batch, const struct blorp_params *params)
 {
-   if (batch->flags & BLORP_BATCH_USE_COMPUTE)
+   if (batch->flags & BLORP_BATCH_USE_BLITTER) {
+      /* Someday, if we implement clears on the blit enginer, we can
+       * use params->src.enabled to determine which case we're in.
+       */
+      assert(params->src.enabled);
+      blorp_xy_block_copy_blt(batch, params);
+   } else if (batch->flags & BLORP_BATCH_USE_COMPUTE) {
       blorp_exec_compute(batch, params);
-   else
+   } else {
       blorp_exec_3d(batch, params);
+   }
 }
 
 #endif /* BLORP_GENX_EXEC_H */



More information about the mesa-commit mailing list