Mesa (main): radeonsi: implement si_sdma_copy_image for gfx7+

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Oct 7 09:44:49 UTC 2021


Module: Mesa
Branch: main
Commit: 46c95047bd77f6e824e4edccad590da8a1823fb4
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=46c95047bd77f6e824e4edccad590da8a1823fb4

Author: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Date:   Tue Sep  7 20:07:19 2021 +0200

radeonsi: implement si_sdma_copy_image for gfx7+

SDMA support was dropped in 1f31a216640 mainly because the
advantages of delegating some copy/clear operations to the
SDMA hw came with large drawbacks: CPU overhead due to the
sdma/gfx synchronization and hangs.

This commit restores SDMA support for all gfx7+ chips but
only for the image copy operations.

SDMA operations won't be intertwined with gfx operations
like before. Instead, a SDMA IB will contain a single copy
at a time and the synchronization will be handled by the
winsys (based on the used buffers).

Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12763>

---

 src/gallium/drivers/radeonsi/meson.build          |   1 +
 src/gallium/drivers/radeonsi/si_pipe.c            |   5 +
 src/gallium/drivers/radeonsi/si_pipe.h            |   4 +
 src/gallium/drivers/radeonsi/si_sdma_copy_image.c | 481 ++++++++++++++++++++++
 src/gallium/drivers/radeonsi/si_state.c           |   4 +-
 src/gallium/drivers/radeonsi/si_state.h           |   1 +
 6 files changed, 494 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/meson.build b/src/gallium/drivers/radeonsi/meson.build
index 79af306e29c..b4919c66c48 100644
--- a/src/gallium/drivers/radeonsi/meson.build
+++ b/src/gallium/drivers/radeonsi/meson.build
@@ -46,6 +46,7 @@ files_libradeonsi = files(
   'si_query.c',
   'si_query.h',
   'si_nir_optim.c',
+  'si_sdma_copy_image.c',
   'si_shader.c',
   'si_shader.h',
   'si_shader_internal.h',
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index c9d668e0503..a4c6f9e0cdf 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -112,6 +112,7 @@ static const struct debug_named_value radeonsi_debug_options[] = {
    {"dccstore", DBG(DCC_STORE), "Enable DCC stores"},
    {"nodccmsaa", DBG(NO_DCC_MSAA), "Disable DCC for MSAA"},
    {"nofmask", DBG(NO_FMASK), "Disable MSAA compression"},
+   {"nodma", DBG(NO_DMA), "Disable SDMA-copy for DRI_PRIME"},
 
    {"tmz", DBG(TMZ), "Force allocation of scanout/depth/stencil buffer as encrypted"},
    {"sqtt", DBG(SQTT), "Enable SQTT"},
@@ -290,6 +291,10 @@ static void si_destroy_context(struct pipe_context *context)
    sctx->ws->cs_destroy(&sctx->gfx_cs);
    if (sctx->ctx)
       sctx->ws->ctx_destroy(sctx->ctx);
+   if (sctx->sdma_cs) {
+      sctx->ws->cs_destroy(sctx->sdma_cs);
+      free(sctx->sdma_cs);
+   }
 
    if (sctx->dirty_implicit_resources)
       _mesa_hash_table_destroy(sctx->dirty_implicit_resources,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 5c115f33b73..705f86d8d0e 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -235,6 +235,7 @@ enum
    DBG_DCC_STORE,
    DBG_NO_DCC_MSAA,
    DBG_NO_FMASK,
+   DBG_NO_DMA,
 
    DBG_TMZ,
    DBG_SQTT,
@@ -898,6 +899,7 @@ struct si_context {
    struct radeon_winsys *ws;
    struct radeon_winsys_ctx *ctx;
    struct radeon_cmdbuf gfx_cs; /* compute IB if graphics is disabled */
+   struct radeon_cmdbuf *sdma_cs;
    struct pipe_fence_handle *last_gfx_fence;
    struct si_resource *eop_bug_scratch;
    struct si_resource *eop_bug_scratch_tmz;
@@ -1428,6 +1430,8 @@ struct pipe_fence_handle *si_create_fence(struct pipe_context *ctx,
 /* si_get.c */
 void si_init_screen_get_functions(struct si_screen *sscreen);
 
+bool si_sdma_copy_image(struct si_context *ctx, struct si_texture *dst, struct si_texture *src);
+
 /* si_gfx_cs.c */
 void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_handle **fence);
 void si_allocate_gds(struct si_context *ctx);
diff --git a/src/gallium/drivers/radeonsi/si_sdma_copy_image.c b/src/gallium/drivers/radeonsi/si_sdma_copy_image.c
new file mode 100644
index 00000000000..3120add84e0
--- /dev/null
+++ b/src/gallium/drivers/radeonsi/si_sdma_copy_image.c
@@ -0,0 +1,481 @@
+/*
+ * Copyright 2010 Jerome Glisse <glisse at freedesktop.org>
+ * Copyright 2015-2021 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "si_build_pm4.h"
+#include "sid.h"
+#include "util/u_memory.h"
+
+
+static
+bool si_prepare_for_sdma_copy(struct si_context *sctx, struct si_texture *dst,struct si_texture *src)
+{
+   if (dst->surface.bpe != src->surface.bpe)
+      return false;
+
+   /* MSAA: Blits don't exist in the real world. */
+   if (src->buffer.b.b.nr_samples > 1 || dst->buffer.b.b.nr_samples > 1)
+      return false;
+
+   if (dst->buffer.b.b.last_level != 0 || src->buffer.b.b.last_level != 0)
+      return false;
+
+   return true;
+}
+
+static unsigned minify_as_blocks(unsigned width, unsigned level, unsigned blk_w)
+{
+   width = u_minify(width, level);
+   return DIV_ROUND_UP(width, blk_w);
+}
+
+static unsigned encode_legacy_tile_info(struct si_context *sctx, struct si_texture *tex)
+{
+   struct radeon_info *info = &sctx->screen->info;
+   unsigned tile_index = tex->surface.u.legacy.tiling_index[0];
+   unsigned macro_tile_index = tex->surface.u.legacy.macro_tile_index;
+   unsigned tile_mode = info->si_tile_mode_array[tile_index];
+   unsigned macro_tile_mode = info->cik_macrotile_mode_array[macro_tile_index];
+
+   return util_logbase2(tex->surface.bpe) |
+          (G_009910_ARRAY_MODE(tile_mode) << 3) |
+          (G_009910_MICRO_TILE_MODE_NEW(tile_mode) << 8) |
+          /* Non-depth modes don't have TILE_SPLIT set. */
+          ((util_logbase2(tex->surface.u.legacy.tile_split >> 6)) << 11) |
+          (G_009990_BANK_WIDTH(macro_tile_mode) << 15) |
+          (G_009990_BANK_HEIGHT(macro_tile_mode) << 18) |
+          (G_009990_NUM_BANKS(macro_tile_mode) << 21) |
+          (G_009990_MACRO_TILE_ASPECT(macro_tile_mode) << 24) |
+          (G_009910_PIPE_CONFIG(tile_mode) << 26);
+}
+
+static
+bool si_translate_format_to_hw(struct si_context *sctx, enum pipe_format format, unsigned *hw_fmt, unsigned *hw_type)
+{
+   const struct util_format_description *desc = util_format_description(format);
+   *hw_fmt = si_translate_colorformat(sctx->chip_class, format);
+
+   int firstchan;
+   for (firstchan = 0; firstchan < 4; firstchan++) {
+      if (desc->channel[firstchan].type != UTIL_FORMAT_TYPE_VOID) {
+         break;
+      }
+   }
+   if (firstchan == 4 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) {
+      *hw_type = V_028C70_NUMBER_FLOAT;
+   } else {
+      *hw_type = V_028C70_NUMBER_UNORM;
+      if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+         *hw_type = V_028C70_NUMBER_SRGB;
+      else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) {
+         if (desc->channel[firstchan].pure_integer) {
+            *hw_type = V_028C70_NUMBER_SINT;
+         } else {
+            assert(desc->channel[firstchan].normalized);
+            *hw_type = V_028C70_NUMBER_SNORM;
+         }
+      } else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+         if (desc->channel[firstchan].pure_integer) {
+            *hw_type = V_028C70_NUMBER_UINT;
+         } else {
+            assert(desc->channel[firstchan].normalized);
+            *hw_type = V_028C70_NUMBER_UNORM;
+         }
+      } else {
+         return false;
+      }
+   }
+   return true;
+}
+
+static
+bool si_sdma_v4_v5_copy_texture(struct si_context *sctx, struct si_texture *sdst, struct si_texture *ssrc, bool is_v5)
+{
+   unsigned bpp = sdst->surface.bpe;
+   uint64_t dst_address = sdst->buffer.gpu_address + sdst->surface.u.gfx9.surf_offset;
+   uint64_t src_address = ssrc->buffer.gpu_address + ssrc->surface.u.gfx9.surf_offset;
+   unsigned dst_pitch = sdst->surface.u.gfx9.surf_pitch;
+   unsigned src_pitch = ssrc->surface.u.gfx9.surf_pitch;
+   unsigned copy_width = DIV_ROUND_UP(ssrc->buffer.b.b.width0, ssrc->surface.blk_w);
+   unsigned copy_height = DIV_ROUND_UP(ssrc->buffer.b.b.height0, ssrc->surface.blk_h);
+
+   bool tmz = (ssrc->buffer.flags & RADEON_FLAG_ENCRYPTED);
+   assert (!tmz || (sdst->buffer.flags & RADEON_FLAG_ENCRYPTED));
+
+   /* Linear -> linear sub-window copy. */
+   if (ssrc->surface.is_linear && sdst->surface.is_linear) {
+      struct radeon_cmdbuf *cs = sctx->sdma_cs;
+
+      unsigned bytes = src_pitch * copy_height * bpp;
+
+      if (!(bytes < (1u << 22)))
+         return false;
+
+      src_address += ssrc->surface.u.gfx9.offset[0];
+      dst_address += sdst->surface.u.gfx9.offset[0];
+
+      radeon_begin(cs);
+      radeon_emit(CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
+                                  CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
+                                  (tmz ? 4 : 0)));
+      radeon_emit(bytes);
+      radeon_emit(0);
+      radeon_emit(src_address);
+      radeon_emit(src_address >> 32);
+      radeon_emit(dst_address);
+      radeon_emit(dst_address >> 32);
+      radeon_end();
+      return true;
+   }
+
+   /* Linear <-> Tiled sub-window copy */
+   if (ssrc->surface.is_linear != sdst->surface.is_linear) {
+      struct si_texture *tiled = ssrc->surface.is_linear ? sdst : ssrc;
+      struct si_texture *linear = tiled == ssrc ? sdst : ssrc;
+      unsigned tiled_width = DIV_ROUND_UP(tiled->buffer.b.b.width0, tiled->surface.blk_w);
+      unsigned tiled_height = DIV_ROUND_UP(tiled->buffer.b.b.height0, tiled->surface.blk_h);
+      unsigned linear_pitch = linear == ssrc ? src_pitch : dst_pitch;
+      unsigned linear_slice_pitch = ((uint64_t)linear->surface.u.gfx9.surf_slice_size) / bpp;
+      uint64_t tiled_address = tiled == ssrc ? src_address : dst_address;
+      uint64_t linear_address = linear == ssrc ? src_address : dst_address;
+      struct radeon_cmdbuf *cs = sctx->sdma_cs;
+      /* Only SDMA 5 supports DCC with SDMA */
+      bool dcc = vi_dcc_enabled(tiled, 0);
+      assert(!dcc || is_v5);
+      assert(tiled->buffer.b.b.depth0 == 1);
+
+      linear_address += linear->surface.u.gfx9.offset[0];
+
+      /* Check if everything fits into the bitfields */
+      if (!(tiled_width < (1 << 14) && tiled_height < (1 << 14) &&
+            linear_pitch < (1 << 14) && linear_slice_pitch < (1 << 28) &&
+            copy_width < (1 << 14) && copy_height < (1 << 14)))
+         return false;
+
+      radeon_begin(cs);
+      radeon_emit(
+         CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
+                         CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW,
+                         (tmz ? 4 : 0)) |
+         dcc << 19 |
+         (is_v5 ? 0 : tiled->buffer.b.b.last_level) << 20 |
+         (linear == sdst ? 1u : 0) << 31);
+      radeon_emit((uint32_t)tiled_address | (tiled->surface.tile_swizzle << 8));
+      radeon_emit((uint32_t)(tiled_address >> 32));
+      radeon_emit(0);
+      radeon_emit(((tiled_width - 1) << 16));
+      radeon_emit((tiled_height - 1));
+      radeon_emit(util_logbase2(bpp) |
+                  tiled->surface.u.gfx9.swizzle_mode << 3 |
+                  tiled->surface.u.gfx9.resource_type << 9 |
+                  (is_v5 ? tiled->buffer.b.b.last_level : tiled->surface.u.gfx9.epitch) << 16);
+      radeon_emit((uint32_t)linear_address);
+      radeon_emit((uint32_t)(linear_address >> 32));
+      radeon_emit(0);
+      radeon_emit(((linear_pitch - 1) << 16));
+      radeon_emit(linear_slice_pitch - 1);
+      radeon_emit((copy_width - 1) | ((copy_height - 1) << 16));
+      radeon_emit(0);
+
+      if (dcc) {
+         unsigned hw_fmt, hw_type;
+         uint64_t md_address = tiled_address + tiled->surface.meta_offset;
+
+         si_translate_format_to_hw(sctx, tiled->buffer.b.b.format, &hw_fmt, &hw_type);
+
+         /* Add metadata */
+         radeon_emit((uint32_t)md_address);
+         radeon_emit((uint32_t)(md_address >> 32));
+         radeon_emit(hw_fmt |
+                     vi_alpha_is_on_msb(sctx->screen, tiled->buffer.b.b.format) << 8 |
+                     hw_type << 9 |
+                     tiled->surface.u.gfx9.color.dcc.max_compressed_block_size << 24 |
+                     V_028C78_MAX_BLOCK_SIZE_256B << 26 |
+                     tmz << 29 |
+                     tiled->surface.u.gfx9.color.dcc.pipe_aligned << 31);
+      }
+      radeon_end();
+      return true;
+   }
+
+   return false;
+}
+
+static
+bool cik_sdma_copy_texture(struct si_context *sctx, struct si_texture *sdst, struct si_texture *ssrc)
+{
+   struct radeon_info *info = &sctx->screen->info;
+   unsigned bpp = sdst->surface.bpe;
+   uint64_t dst_address = sdst->buffer.gpu_address + sdst->surface.u.legacy.level[0].offset_256B * 256;
+   uint64_t src_address = ssrc->buffer.gpu_address + ssrc->surface.u.legacy.level[0].offset_256B * 256;
+   unsigned dst_mode = sdst->surface.u.legacy.level[0].mode;
+   unsigned src_mode = ssrc->surface.u.legacy.level[0].mode;
+   unsigned dst_tile_index = sdst->surface.u.legacy.tiling_index[0];
+   unsigned src_tile_index = ssrc->surface.u.legacy.tiling_index[0];
+   unsigned dst_tile_mode = info->si_tile_mode_array[dst_tile_index];
+   unsigned src_tile_mode = info->si_tile_mode_array[src_tile_index];
+   unsigned dst_micro_mode = G_009910_MICRO_TILE_MODE_NEW(dst_tile_mode);
+   unsigned src_micro_mode = G_009910_MICRO_TILE_MODE_NEW(src_tile_mode);
+   unsigned dst_tile_swizzle = dst_mode == RADEON_SURF_MODE_2D ? sdst->surface.tile_swizzle : 0;
+   unsigned src_tile_swizzle = src_mode == RADEON_SURF_MODE_2D ? ssrc->surface.tile_swizzle : 0;
+   unsigned dst_pitch = sdst->surface.u.legacy.level[0].nblk_x;
+   unsigned src_pitch = ssrc->surface.u.legacy.level[0].nblk_x;
+   uint64_t dst_slice_pitch =
+      ((uint64_t)sdst->surface.u.legacy.level[0].slice_size_dw * 4) / bpp;
+   uint64_t src_slice_pitch =
+      ((uint64_t)ssrc->surface.u.legacy.level[0].slice_size_dw * 4) / bpp;
+   unsigned dst_width = minify_as_blocks(sdst->buffer.b.b.width0, 0, sdst->surface.blk_w);
+   unsigned src_width = minify_as_blocks(ssrc->buffer.b.b.width0, 0, ssrc->surface.blk_w);
+   unsigned copy_width = DIV_ROUND_UP(ssrc->buffer.b.b.width0, ssrc->surface.blk_w);
+   unsigned copy_height = DIV_ROUND_UP(ssrc->buffer.b.b.height0, ssrc->surface.blk_h);
+
+   dst_address |= dst_tile_swizzle << 8;
+   src_address |= src_tile_swizzle << 8;
+
+   /* Linear -> linear sub-window copy. */
+   if (dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED && src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED &&
+       /* check if everything fits into the bitfields */
+       src_pitch <= (1 << 14) && dst_pitch <= (1 << 14) && src_slice_pitch <= (1 << 28) &&
+       dst_slice_pitch <= (1 << 28) && copy_width <= (1 << 14) && copy_height <= (1 << 14) &&
+       /* HW limitation - GFX7: */
+       (sctx->chip_class != GFX7 ||
+        (copy_width < (1 << 14) && copy_height < (1 << 14))) &&
+       /* HW limitation - some GFX7 parts: */
+       ((sctx->family != CHIP_BONAIRE && sctx->family != CHIP_KAVERI) ||
+        (copy_width != (1 << 14) && copy_height != (1 << 14)))) {
+      struct radeon_cmdbuf *cs = sctx->sdma_cs;
+
+      radeon_begin(cs);
+      radeon_emit(CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) |
+                  (util_logbase2(bpp) << 29));
+      radeon_emit(src_address);
+      radeon_emit(src_address >> 32);
+      radeon_emit(0);
+      radeon_emit((src_pitch - 1) << 16);
+      radeon_emit(src_slice_pitch - 1);
+      radeon_emit(dst_address);
+      radeon_emit(dst_address >> 32);
+      radeon_emit(0);
+      radeon_emit((dst_pitch - 1) << 16);
+      radeon_emit(dst_slice_pitch - 1);
+      if (sctx->chip_class == GFX7) {
+         radeon_emit(copy_width | (copy_height << 16));
+         radeon_emit(0);
+      } else {
+         radeon_emit((copy_width - 1) | ((copy_height - 1) << 16));
+         radeon_emit(0);
+      }
+      radeon_end();
+      return true;
+   }
+
+   /* Tiled <-> linear sub-window copy. */
+   if ((src_mode >= RADEON_SURF_MODE_1D) != (dst_mode >= RADEON_SURF_MODE_1D)) {
+      struct si_texture *tiled = src_mode >= RADEON_SURF_MODE_1D ? ssrc : sdst;
+      struct si_texture *linear = tiled == ssrc ? sdst : ssrc;
+      unsigned tiled_width = tiled == ssrc ? src_width : dst_width;
+      unsigned linear_width = linear == ssrc ? src_width : dst_width;
+      unsigned tiled_pitch = tiled == ssrc ? src_pitch : dst_pitch;
+      unsigned linear_pitch = linear == ssrc ? src_pitch : dst_pitch;
+      unsigned tiled_slice_pitch = tiled == ssrc ? src_slice_pitch : dst_slice_pitch;
+      unsigned linear_slice_pitch = linear == ssrc ? src_slice_pitch : dst_slice_pitch;
+      uint64_t tiled_address = tiled == ssrc ? src_address : dst_address;
+      uint64_t linear_address = linear == ssrc ? src_address : dst_address;
+      unsigned tiled_micro_mode = tiled == ssrc ? src_micro_mode : dst_micro_mode;
+
+      assert(tiled_pitch % 8 == 0);
+      assert(tiled_slice_pitch % 64 == 0);
+      unsigned pitch_tile_max = tiled_pitch / 8 - 1;
+      unsigned slice_tile_max = tiled_slice_pitch / 64 - 1;
+      unsigned xalign = MAX2(1, 4 / bpp);
+      unsigned copy_width_aligned = copy_width;
+
+      /* If the region ends at the last pixel and is unaligned, we
+       * can copy the remainder of the line that is not visible to
+       * make it aligned.
+       */
+      if (copy_width % xalign != 0 && 0 + copy_width == linear_width &&
+          copy_width == tiled_width &&
+          align(copy_width, xalign) <= linear_pitch &&
+          align(copy_width, xalign) <= tiled_pitch)
+         copy_width_aligned = align(copy_width, xalign);
+
+      /* HW limitations. */
+      if ((sctx->family == CHIP_BONAIRE || sctx->family == CHIP_KAVERI) &&
+          linear_pitch - 1 == 0x3fff && bpp == 16)
+         return false;
+
+      if ((sctx->family == CHIP_BONAIRE || sctx->family == CHIP_KAVERI ||
+           sctx->family == CHIP_KABINI) &&
+          (copy_width == (1 << 14) || copy_height == (1 << 14)))
+         return false;
+
+      /* The hw can read outside of the given linear buffer bounds,
+       * or access those pages but not touch the memory in case
+       * of writes. (it still causes a VM fault)
+       *
+       * Out-of-bounds memory access or page directory access must
+       * be prevented.
+       */
+      int64_t start_linear_address, end_linear_address;
+      unsigned granularity;
+
+      /* Deduce the size of reads from the linear surface. */
+      switch (tiled_micro_mode) {
+      case V_009910_ADDR_SURF_DISPLAY_MICRO_TILING:
+         granularity = bpp == 1 ? 64 / (8 * bpp) : 128 / (8 * bpp);
+         break;
+      case V_009910_ADDR_SURF_THIN_MICRO_TILING:
+      case V_009910_ADDR_SURF_DEPTH_MICRO_TILING:
+         if (0 /* TODO: THICK microtiling */)
+            granularity =
+               bpp == 1 ? 32 / (8 * bpp)
+                        : bpp == 2 ? 64 / (8 * bpp) : bpp <= 8 ? 128 / (8 * bpp) : 256 / (8 * bpp);
+         else
+            granularity = bpp <= 2 ? 64 / (8 * bpp) : bpp <= 8 ? 128 / (8 * bpp) : 256 / (8 * bpp);
+         break;
+      default:
+         return false;
+      }
+
+      /* The linear reads start at tiled_x & ~(granularity - 1).
+       * If linear_x == 0 && tiled_x % granularity != 0, the hw
+       * starts reading from an address preceding linear_address!!!
+       */
+      start_linear_address =
+         linear->surface.u.legacy.level[0].offset_256B * 256;
+
+      end_linear_address =
+         linear->surface.u.legacy.level[0].offset_256B * 256 +
+         bpp * ((copy_height - 1) * linear_pitch + copy_width);
+
+      if ((0 + copy_width) % granularity)
+         end_linear_address += granularity - (0 + copy_width) % granularity;
+
+      if (start_linear_address < 0 || end_linear_address > linear->surface.surf_size)
+         return false;
+
+      /* Check requirements. */
+      if (tiled_address % 256 == 0 && linear_address % 4 == 0 && linear_pitch % xalign == 0 &&
+          copy_width_aligned % xalign == 0 &&
+          tiled_micro_mode != V_009910_ADDR_SURF_ROTATED_MICRO_TILING &&
+          /* check if everything fits into the bitfields */
+          tiled->surface.u.legacy.tile_split <= 4096 && pitch_tile_max < (1 << 11) &&
+          slice_tile_max < (1 << 22) && linear_pitch <= (1 << 14) &&
+          linear_slice_pitch <= (1 << 28) && copy_width_aligned <= (1 << 14) &&
+          copy_height <= (1 << 14)) {
+         struct radeon_cmdbuf *cs = sctx->sdma_cs;
+         uint32_t direction = linear == sdst ? 1u << 31 : 0;
+
+         radeon_begin(cs);
+         radeon_emit(CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
+                                     CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW, 0) |
+                     direction);
+         radeon_emit(tiled_address);
+         radeon_emit(tiled_address >> 32);
+         radeon_emit(0);
+         radeon_emit(pitch_tile_max << 16);
+         radeon_emit(slice_tile_max);
+         radeon_emit(encode_legacy_tile_info(sctx, tiled));
+         radeon_emit(linear_address);
+         radeon_emit(linear_address >> 32);
+         radeon_emit(0);
+         radeon_emit(((linear_pitch - 1) << 16));
+         radeon_emit(linear_slice_pitch - 1);
+         if (sctx->chip_class == GFX7) {
+            radeon_emit(copy_width_aligned | (copy_height << 16));
+            radeon_emit(1);
+         } else {
+            radeon_emit((copy_width_aligned - 1) | ((copy_height - 1) << 16));
+            radeon_emit(0);
+         }
+         radeon_end();
+         return true;
+      }
+   }
+
+   return false;
+}
+
+bool si_sdma_copy_image(struct si_context *sctx, struct si_texture *dst, struct si_texture *src)
+{
+   struct radeon_winsys *ws = sctx->ws;
+
+   if (!sctx->sdma_cs) {
+      if (sctx->screen->debug_flags & DBG(NO_DMA) || sctx->chip_class < GFX7)
+         return false;
+
+      sctx->sdma_cs = CALLOC_STRUCT(radeon_cmdbuf);
+      if (ws->cs_create(sctx->sdma_cs, sctx->ctx, RING_DMA,
+                        NULL, NULL, true))
+         return false;
+   }
+
+   if (!si_prepare_for_sdma_copy(sctx, dst, src))
+      return false;
+
+   /* Decompress DCC on older chips */
+   if (vi_dcc_enabled(src, 0) && sctx->chip_class < GFX10)
+      si_decompress_dcc(sctx, src);
+   /* TODO: DCC compression is possible on GFX10+. See si_set_mutable_tex_desc_fields for
+    * additional constraints.
+    * For now, the only use-case of SDMA is DRI_PRIME tiled->linear copy, so this is not
+    * implemented. */
+   if (vi_dcc_enabled(dst, 0))
+      return false;
+
+   /* Always flush the gfx queue to get the winsys to handle the dependencies for us. */
+   si_flush_gfx_cs(sctx, 0, NULL);
+
+   switch (sctx->chip_class) {
+      case GFX7:
+      case GFX8:
+         if (!cik_sdma_copy_texture(sctx, dst, src))
+            return false;
+         break;
+      case GFX9:
+      case GFX10:
+      case GFX10_3:
+         if (!si_sdma_v4_v5_copy_texture(sctx, dst, src, sctx->chip_class >= GFX10))
+            return false;
+         break;
+      default:
+         return false;
+   }
+
+   radeon_add_to_buffer_list(sctx, sctx->sdma_cs, &src->buffer, RADEON_USAGE_READ,
+                             RADEON_PRIO_SAMPLER_TEXTURE);
+   radeon_add_to_buffer_list(sctx, sctx->sdma_cs, &dst->buffer, RADEON_USAGE_WRITE,
+                             RADEON_PRIO_SAMPLER_TEXTURE);
+
+   unsigned flags = RADEON_FLUSH_START_NEXT_GFX_IB_NOW;
+   if (unlikely(radeon_uses_secure_bos(sctx->ws))) {
+      if ((bool) (src->buffer.flags & RADEON_FLAG_ENCRYPTED) !=
+          sctx->ws->cs_is_secure(sctx->sdma_cs)) {
+         flags = RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION;
+      }
+   }
+
+   return ws->cs_flush(sctx->sdma_cs, flags, NULL) == 0;
+}
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 8b02e79437c..03ed99ce56c 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1560,8 +1560,8 @@ static void si_emit_db_render_state(struct si_context *sctx)
 /*
  * format translation
  */
-static uint32_t si_translate_colorformat(enum chip_class chip_class,
-                                         enum pipe_format format)
+uint32_t si_translate_colorformat(enum chip_class chip_class,
+                                  enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
    if (!desc)
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index bf2bc8ef811..931230260a4 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -527,6 +527,7 @@ struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned heap, uns
 void si_bindless_descriptor_slab_free(void *priv, struct pb_slab *pslab);
 void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf);
 /* si_state.c */
+uint32_t si_translate_colorformat(enum chip_class chip_class, enum pipe_format format);
 void si_init_state_compute_functions(struct si_context *sctx);
 void si_init_state_functions(struct si_context *sctx);
 void si_init_screen_state_functions(struct si_screen *sscreen);



More information about the mesa-commit mailing list