Mesa (main): nvc0: fix 3d images

Sun Jun 6 18:32:57 UTC 2021

Module: Mesa
Branch: main
Commit: c7e877b0bfd704935762d025c942f260184b1520
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=c7e877b0bfd704935762d025c942f260184b1520

Author: Ilia Mirkin <imirkin at alum.mit.edu>
Date:   Sat May 15 23:18:52 2021 -0400

nvc0: fix 3d images

The hardware has no support for 3d image loads/stores. So present the
image as a larger 2d image and fudge the coordinates. Note that a 2d
image (in the shader) may be backed by a slice of a 3d image, so we
always have to do the coordinate adjustments for 2d as well.

This is largely copied from the nv50 support, which has the same
restriction, with extra care taken to differentiate loads (which
specifies the X coordinate in bytes) and stores, which specifies it in
(formatted) pixels.

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10820>

---

 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      | 92 +++++++++++++++++++++-
 src/gallium/drivers/nouveau/nvc0/nvc0_tex.c        | 41 +++++++---
 2 files changed, 117 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index e45fc08f1b8..83634f1e34d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -2415,14 +2415,100 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
    // calculate pixel offset
    if (su->op == OP_SULDP || su->op == OP_SUREDP) {
       v = loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless);
-      su->setSrc(0, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[0], v));
+      su->setSrc(0, (src[0] = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), src[0], v)));
    }
 
    // add array layer offset
    if (su->tex.target.isArray() || su->tex.target.isCube()) {
       v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY, su->tex.bindless);
       assert(dim > 1);
-      su->setSrc(2, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v));
+      su->setSrc(2, (src[2] = bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v)));
+   }
+
+   // 3d is special-cased. Note that a single "slice" of a 3d image may
+   // also be attached as 2d, so we have to do the same 3d processing for
+   // 2d as well, just in case. In order to remap a 3d image onto a 2d
+   // image, we have to retile it "by hand".
+   if (su->tex.target == TEX_TARGET_3D || su->tex.target == TEX_TARGET_2D) {
+      Value *z = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless);
+      Value *y_size_aligned =
+         bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(),
+                    loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM_Y, su->tex.bindless),
+                    bld.loadImm(NULL, 0x0000ffff));
+      // Add the z coordinate for actual 3d-images
+      if (dim > 2)
+         src[2] = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), z, src[2]);
+      else
+         src[2] = z;
+
+      // Compute the surface parameters from tile shifts
+      Value *tile_shift[3];
+      Value *tile_extbf[3];
+      // Fetch the "real" tiling parameters of the underlying surface
+      for (int i = 0; i < 3; i++) {
+         tile_extbf[i] =
+            bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(),
+                       loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(i), su->tex.bindless),
+                       bld.loadImm(NULL, 16));
+         tile_shift[i] =
+            bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(),
+                       loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(i), su->tex.bindless),
+                       bld.loadImm(NULL, 24));
+      }
+
+      // However for load/atomics, we use byte-indexing. And for byte
+      // indexing, the X tile size is always the same. This leads to slightly
+      // better code.
+      if (su->op == OP_SULDP || su->op == OP_SUREDP) {
+         tile_extbf[0] = bld.loadImm(NULL, 0x600);
+         tile_shift[0] = bld.loadImm(NULL, 6);
+      }
+
+      // Compute the location of given coordinate, both inside the tile as
+      // well as which (linearly-laid out) tile it's in.
+      Value *coord_in_tile[3];
+      Value *tile[3];
+      for (int i = 0; i < 3; i++) {
+         coord_in_tile[i] = bld.mkOp2v(OP_EXTBF, TYPE_U32, bld.getSSA(), src[i], tile_extbf[i]);
+         tile[i] = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), src[i], tile_shift[i]);
+      }
+
+      // Based on the "real" tiling parameters, compute x/y coordinates in the
+      // larger surface with 2d tiling that was supplied to the hardware. This
+      // was determined and verified with the help of the tiling pseudocode in
+      // the envytools docs.
+      //
+      // adj_x = x_coord_in_tile + x_tile * x_tile_size * z_tile_size +
+      //         z_coord_in_tile * x_tile_size
+      // adj_y = y_coord_in_tile + y_tile * y_tile_size +
+      //         z_tile * y_tile_size * y_tiles
+      //
+      // Note: STRIDE_Y = y_tile_size * y_tiles
+
+      su->setSrc(0, bld.mkOp2v(
+            OP_ADD, TYPE_U32, bld.getSSA(),
+            bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(),
+                       coord_in_tile[0],
+                       bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
+                                  tile[0],
+                                  bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(),
+                                             tile_shift[2], tile_shift[0]))),
+            bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
+                       coord_in_tile[2], tile_shift[0])));
+
+      su->setSrc(1, bld.mkOp2v(
+            OP_ADD, TYPE_U32, bld.getSSA(),
+            bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(),
+                       tile[2], y_size_aligned),
+            bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(),
+                       coord_in_tile[1],
+                       bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
+                                  tile[1], tile_shift[1]))));
+
+      if (su->tex.target == TEX_TARGET_3D) {
+         su->moveSources(3, -1);
+         su->tex.target = TEX_TARGET_2D;
+      }
    }
 
    // prevent read fault when the image is not actually bound
@@ -2438,7 +2524,7 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
       assert(format->components != 0);
       // make sure that the format doesn't mismatch when it's not FMT_NONE
       bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
-                TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
+                TYPE_U32, bld.loadImm(NULL, ffs(blockwidth / 8) - 1),
                 loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
                 pred->getDef(0));
    }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index c574169fedb..a9b475e7e36 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -1102,19 +1102,27 @@ nvc0_set_surface_info(struct nouveau_pushbuf *push,
 
    /* Stick the blockwidth (ie. number of bytes per pixel) to calculate pixel
     * offset and to check if the format doesn't mismatch. */
-   info[12] = util_format_get_blocksize(view->format);
+   info[12] = ffs(util_format_get_blocksize(view->format)) - 1;
 
    if (res->base.target == PIPE_BUFFER) {
       info[0]  = address >> 8;
       info[2]  = width;
    } else {
       struct nv50_miptree *mt = nv50_miptree(&res->base);
+      struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
+      unsigned z = mt->layout_3d ? view->u.tex.first_layer : 0;
+      unsigned nby = align(util_format_get_nblocksy(view->format, height),
+                           NVC0_TILE_SIZE_Y(lvl->tile_mode));
 
+      /* NOTE: this does not precisely match nve4; the values are made to be
+       * easier for the shader to consume.
+       */
       info[0]  = address >> 8;
-      info[2]  = width;
-      info[4]  = height;
+      info[2]  = (NVC0_TILE_SHIFT_X(lvl->tile_mode) - info[12]) << 24;
+      info[4]  = NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 24 | nby;
       info[5]  = mt->layer_stride >> 8;
-      info[6]  = depth;
+      info[6]  = NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 24;
+      info[7]  = z;
       info[14] = mt->ms_x;
       info[15] = mt->ms_y;
    }
@@ -1167,24 +1175,31 @@ nvc0_validate_suf(struct nvc0_context *nvc0, int s)
          } else {
             struct nv50_miptree *mt = nv50_miptree(view->resource);
             struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
-            const unsigned z = view->u.tex.first_layer;
+            unsigned adjusted_width = width, adjusted_height = height;
 
             if (mt->layout_3d) {
-               address += nvc0_mt_zslice_offset(mt, view->u.tex.level, z);
-               if (depth >= 1) {
-                  pipe_debug_message(&nvc0->base.debug, CONFORMANCE,
-                                     "3D images are not supported!");
-                  debug_printf("3D images are not supported!\n");
-               }
+               // We have to adjust the size of the 3d surface to be
+               // accessible within 2d limits. The size of each z tile goes
+               // into the x direction, while the number of z tiles goes into
+               // the y direction.
+               const unsigned nbx = util_format_get_nblocksx(view->format, width);
+               const unsigned nby = util_format_get_nblocksy(view->format, height);
+               const unsigned tsx = NVC0_TILE_SIZE_X(lvl->tile_mode);
+               const unsigned tsy = NVC0_TILE_SIZE_Y(lvl->tile_mode);
+               const unsigned tsz = NVC0_TILE_SIZE_Z(lvl->tile_mode);
+
+               adjusted_width = align(nbx, tsx / util_format_get_blocksize(view->format)) * tsz;
+               adjusted_height = align(nby, tsy) * align(depth, tsz) >> NVC0_TILE_SHIFT_Z(lvl->tile_mode);
             } else {
+               const unsigned z = view->u.tex.first_layer;
                address += mt->layer_stride * z;
             }
             address += lvl->offset;
 
             PUSH_DATAh(push, address);
             PUSH_DATA (push, address);
-            PUSH_DATA (push, width << mt->ms_x);
-            PUSH_DATA (push, height << mt->ms_y);
+            PUSH_DATA (push, adjusted_width << mt->ms_x);
+            PUSH_DATA (push, adjusted_height << mt->ms_y);
             PUSH_DATA (push, rt);
             PUSH_DATA (push, lvl->tile_mode & 0xff); /* mask out z-tiling */
          }