Mesa (master): radeonsi: optimize access pattern for compute blits with linear textures

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri May 15 22:24:22 UTC 2020


Module: Mesa
Branch: master
Commit: b158b117e1ef69d47724f607fb5bd28389148fac
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=b158b117e1ef69d47724f607fb5bd28389148fac

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Wed May  6 14:12:27 2020 -0400

radeonsi: optimize access pattern for compute blits with linear textures

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4935>

---

 src/gallium/drivers/radeonsi/si_compute_blit.c   | 21 +++++++++++++++------
 src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c |  8 +++-----
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c
index c167a8bc7ee..97ca936f810 100644
--- a/src/gallium/drivers/radeonsi/si_compute_blit.c
+++ b/src/gallium/drivers/radeonsi/si_compute_blit.c
@@ -393,6 +393,8 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
    unsigned depth = src_box->depth;
    enum pipe_format src_format = util_format_linear(src->format);
    enum pipe_format dst_format = util_format_linear(dst->format);
+   bool is_linear = ((struct si_texture*)src)->surface.is_linear ||
+                    ((struct si_texture*)dst)->surface.is_linear;
 
    assert(util_format_is_subsampled_422(src_format) == util_format_is_subsampled_422(dst_format));
 
@@ -519,13 +521,20 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
       if (!sctx->cs_copy_image)
          sctx->cs_copy_image = si_create_copy_image_compute_shader(ctx);
       ctx->bind_compute_state(ctx, sctx->cs_copy_image);
-      info.block[0] = 8;
-      info.last_block[0] = width % 8;
-      info.block[1] = 8;
-      info.last_block[1] = height % 8;
+
+      /* This is better for access over PCIe. */
+      if (is_linear) {
+         info.block[0] = 64;
+         info.block[1] = 1;
+      } else {
+         info.block[0] = 8;
+         info.block[1] = 8;
+      }
+      info.last_block[0] = width % info.block[0];
+      info.last_block[1] = height % info.block[1];
       info.block[2] = 1;
-      info.grid[0] = DIV_ROUND_UP(width, 8);
-      info.grid[1] = DIV_ROUND_UP(height, 8);
+      info.grid[0] = DIV_ROUND_UP(width, info.block[0]);
+      info.grid[1] = DIV_ROUND_UP(height, info.block[1]);
       info.grid[2] = depth;
    }
 
diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
index d1a97c210b0..c1a150d6ab3 100644
--- a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
+++ b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
@@ -503,18 +503,16 @@ void *si_create_copy_image_compute_shader(struct pipe_context *ctx)
 {
    static const char text[] =
       "COMP\n"
-      "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
-      "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
-      "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
       "DCL SV[0], THREAD_ID\n"
       "DCL SV[1], BLOCK_ID\n"
+      "DCL SV[2], BLOCK_SIZE\n"
       "DCL IMAGE[0], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n"
       "DCL IMAGE[1], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n"
       "DCL CONST[0][0..1]\n" // 0:xyzw 1:xyzw
       "DCL TEMP[0..4], LOCAL\n"
-      "IMM[0] UINT32 {8, 1, 0, 0}\n"
+
       "MOV TEMP[0].xyz, CONST[0][0].xyzw\n"
-      "UMAD TEMP[1].xyz, SV[1].xyzz, IMM[0].xxyy, SV[0].xyzz\n"
+      "UMAD TEMP[1].xyz, SV[1].xyzz, SV[2].xyzz, SV[0].xyzz\n"
       "UADD TEMP[2].xyz, TEMP[1].xyzx, TEMP[0].xyzx\n"
       "LOAD TEMP[3], IMAGE[0], TEMP[2].xyzx, 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n"
       "MOV TEMP[4].xyz, CONST[0][1].xyzw\n"



More information about the mesa-commit mailing list