Mesa (master): pan/bi: Lower cube map coordinates

Tue Nov 3 14:55:05 UTC 2020

Module: Mesa
Branch: master
Commit: 2f00f82469e0df49f08edf9ca1b4930bdf0cdf08
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=2f00f82469e0df49f08edf9ca1b4930bdf0cdf08

Author: Boris Brezillon <boris.brezillon at collabora.com>
Date:   Mon Nov  2 13:33:55 2020 -0500

pan/bi: Lower cube map coordinates

We need to do the transform specified in the OpenGL spec ourselves, with
some assistance from the hardware.

Signed-off-by: Boris Brezillon <boris.brezillon at collabora.com>
Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7408>

---

 src/panfrost/bifrost/bifrost_compile.c | 181 ++++++++++++++++++++++++++++++++-
 1 file changed, 176 insertions(+), 5 deletions(-)

diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c
index 5ecfe7d46ac..206551fdcdc 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -1391,6 +1391,173 @@ bi_emit_tex_offset_ms_index(bi_context *ctx, nir_tex_instr *instr)
         return dest;
 }
 
+static void
+bi_lower_cube_coord(bi_context *ctx, unsigned coord,
+                    unsigned *face, unsigned *s, unsigned *t)
+{
+        /* Compute max { |x|, |y|, |z| } */
+        bi_instruction cubeface1 = {
+                .type = BI_SPECIAL_FMA,
+                .op.special = BI_SPECIAL_CUBEFACE1,
+                .dest = bi_make_temp(ctx),
+                .dest_type = nir_type_float32,
+                .src = { coord, coord, coord },
+                .src_types = { nir_type_float32, nir_type_float32, nir_type_float32 },
+                .swizzle = { {0}, {1}, {2} }
+        };
+
+        /* Calculate packed exponent / face / infinity. In reality this reads
+         * the destination from cubeface1 but that's handled by lowering */
+        bi_instruction cubeface2 = {
+                .type = BI_SPECIAL_ADD,
+                .op.special = BI_SPECIAL_CUBEFACE2,
+                .dest = bi_make_temp(ctx),
+                .dest_type = nir_type_uint32,
+                .src = { coord, coord, coord },
+                .src_types = { nir_type_float32, nir_type_float32, nir_type_float32 },
+                .swizzle = { {0}, {1}, {2} }
+        };
+
+        /* Select S coordinate */
+        bi_instruction cube_ssel = {
+                .type = BI_SPECIAL_ADD,
+                .op.special = BI_SPECIAL_CUBE_SSEL,
+                .dest = bi_make_temp(ctx),
+                .dest_type = nir_type_float32,
+                .src = { coord, coord, cubeface2.dest },
+                .src_types = { nir_type_float32, nir_type_float32, nir_type_uint32 },
+                .swizzle = { {2}, {0} }
+        };
+
+        /* Select T coordinate */
+        bi_instruction cube_tsel = {
+                .type = BI_SPECIAL_ADD,
+                .op.special = BI_SPECIAL_CUBE_TSEL,
+                .dest = bi_make_temp(ctx),
+                .dest_type = nir_type_float32,
+                .src = { coord, coord, cubeface2.dest },
+                .src_types = { nir_type_float32, nir_type_float32, nir_type_uint32 },
+                .swizzle = { {1}, {2} }
+        };
+
+        /* The OpenGL ES specification requires us to transform an input vector
+         * (x, y, z) to the coordinate, given the selected S/T:
+         *
+         * (1/2 ((s / max{x,y,z}) + 1), 1/2 ((t / max{x, y, z}) + 1))
+         *
+         * We implement (s shown, t similar) in a form friendlier to FMA
+         * instructions, and clamp coordinates at the end for correct
+         * NaN/infinity handling:
+         *
+         * fsat(s * (0.5 * (1 / max{x, y, z})) + 0.5)
+         *
+         * Take the reciprocal of max{x, y, z}
+         */
+
+        bi_instruction frcp = {
+                .type = BI_SPECIAL_ADD,
+                .op.special = BI_SPECIAL_FRCP,
+                .dest = bi_make_temp(ctx),
+                .dest_type = nir_type_float32,
+                .src = { cubeface1.dest },
+                .src_types = { nir_type_float32 },
+        };
+
+        /* Calculate 0.5 * (1.0 / max{x, y, z}) */
+        bi_instruction fma1 = {
+                .type = BI_FMA,
+                .dest = bi_make_temp(ctx),
+                .dest_type = nir_type_float32,
+                .src = { frcp.dest, BIR_INDEX_CONSTANT | 0, BIR_INDEX_ZERO },
+                .src_types = { nir_type_float32, nir_type_float32, nir_type_float32 },
+                .constant.u64 = 0x3f000000, /* 0.5f */
+        };
+
+        /* Transform the s coordinate */
+        bi_instruction fma2 = {
+                .type = BI_FMA,
+                .outmod = BIFROST_SAT,
+                .dest = bi_make_temp(ctx),
+                .dest_type = nir_type_float32,
+                .src = { fma1.dest, cube_ssel.dest, BIR_INDEX_CONSTANT | 0 },
+                .src_types = { nir_type_float32, nir_type_float32, nir_type_float32 },
+                .constant.u64 = 0x3f000000, /* 0.5f */
+        };
+
+        /* Transform the t coordinate */
+        bi_instruction fma3 = {
+                .type = BI_FMA,
+                .outmod = BIFROST_SAT,
+                .dest = bi_make_temp(ctx),
+                .dest_type = nir_type_float32,
+                .src = { fma1.dest, cube_tsel.dest, BIR_INDEX_CONSTANT | 0 },
+                .src_types = { nir_type_float32, nir_type_float32, nir_type_float32 },
+                .constant.u64 = 0x3f000000, /* 0.5f */
+        };
+
+        bi_emit(ctx, cubeface1);
+        bi_emit(ctx, cubeface2);
+        bi_emit(ctx, cube_ssel);
+        bi_emit(ctx, cube_tsel);
+        bi_emit(ctx, frcp);
+        bi_emit(ctx, fma1);
+        bi_emit(ctx, fma2);
+        bi_emit(ctx, fma3);
+
+        /* Cube face is stored in bit[29:31], we don't apply the shift here
+         * because the TEXS_CUBE and TEXC instructions expect the face index to
+         * be at this position.
+         */
+        *face = cubeface2.dest;
+        *s = fma2.dest;
+        *t = fma3.dest;
+}
+
+static void
+texc_pack_cube_coord(bi_context *ctx, unsigned coord,
+                     unsigned *face_s, unsigned *t)
+{
+        unsigned face, s;
+
+        bi_lower_cube_coord(ctx, coord, &face, &s, t);
+
+        bi_instruction and1 = {
+                .type = BI_BITWISE,
+                .op.bitwise = BI_BITWISE_AND,
+                .dest = bi_make_temp(ctx),
+                .dest_type = nir_type_uint32,
+                .src = { face, BIR_INDEX_CONSTANT | 0, BIR_INDEX_ZERO },
+                .src_types = { nir_type_uint32, nir_type_uint32, nir_type_uint8 },
+                .constant.u64 = 0xe0000000,
+        };
+
+        bi_instruction and2 = {
+                .type = BI_BITWISE,
+                .op.bitwise = BI_BITWISE_AND,
+                .dest = bi_make_temp(ctx),
+                .dest_type = nir_type_uint32,
+                .src = { s, BIR_INDEX_CONSTANT | 0, BIR_INDEX_ZERO },
+                .src_types = { nir_type_uint32, nir_type_uint32, nir_type_uint8 },
+                .constant.u64 = 0x1fffffff,
+        };
+
+        bi_instruction or = {
+                .type = BI_BITWISE,
+                .op.bitwise = BI_BITWISE_OR,
+                .dest = bi_make_temp(ctx),
+                .dest_type = nir_type_uint32,
+                .src = { and1.dest, and2.dest, BIR_INDEX_ZERO },
+                .src_types = { nir_type_uint32, nir_type_uint32, nir_type_uint8 },
+        };
+
+        bi_emit(ctx, and1);
+        bi_emit(ctx, and2);
+        bi_emit(ctx, or);
+
+        /* packed cube-face + s */
+        *face_s = or.dest;
+}
+
 /* Map to the main texture op used. Some of these (txd in particular) will
  * lower to multiple texture ops with different opcodes (GRDESC_DER + TEX in
  * sequence). We assume that lowering is handled elsewhere.
@@ -1507,11 +1674,15 @@ emit_texc(bi_context *ctx, nir_tex_instr *instr)
 
                 switch (instr->src[i].src_type) {
                 case nir_tex_src_coord:
-                        /* TODO: cube map descriptor */
-                        tex.src[1] = index;
-                        tex.src[2] = index;
-                        tex.swizzle[1][0] = 0;
-                        tex.swizzle[2][0] = 1;
+                        if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+                                texc_pack_cube_coord(ctx, index,
+                                                     &tex.src[1], &tex.src[2]);
+			} else {
+                                tex.src[1] = index;
+                                tex.src[2] = index;
+                                tex.swizzle[1][0] = 0;
+                                tex.swizzle[2][0] = 1;
+                        }
                         break;
 
                 case nir_tex_src_lod: