Mesa (master): pan/bi: Lower cube map coordinates
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Tue Nov 3 14:55:05 UTC 2020
Module: Mesa
Branch: master
Commit: 2f00f82469e0df49f08edf9ca1b4930bdf0cdf08
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2f00f82469e0df49f08edf9ca1b4930bdf0cdf08
Author: Boris Brezillon <boris.brezillon at collabora.com>
Date: Mon Nov 2 13:33:55 2020 -0500
pan/bi: Lower cube map coordinates
We need to do the transform specified in the OpenGL spec ourselves, with
some assistance from the hardware.
Signed-off-by: Boris Brezillon <boris.brezillon at collabora.com>
Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7408>
---
src/panfrost/bifrost/bifrost_compile.c | 181 ++++++++++++++++++++++++++++++++-
1 file changed, 176 insertions(+), 5 deletions(-)
diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c
index 5ecfe7d46ac..206551fdcdc 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -1391,6 +1391,173 @@ bi_emit_tex_offset_ms_index(bi_context *ctx, nir_tex_instr *instr)
return dest;
}
+static void
+bi_lower_cube_coord(bi_context *ctx, unsigned coord,
+ unsigned *face, unsigned *s, unsigned *t)
+{
+ /* Compute max { |x|, |y|, |z| } */
+ bi_instruction cubeface1 = {
+ .type = BI_SPECIAL_FMA,
+ .op.special = BI_SPECIAL_CUBEFACE1,
+ .dest = bi_make_temp(ctx),
+ .dest_type = nir_type_float32,
+ .src = { coord, coord, coord },
+ .src_types = { nir_type_float32, nir_type_float32, nir_type_float32 },
+ .swizzle = { {0}, {1}, {2} }
+ };
+
+ /* Calculate packed exponent / face / infinity. In reality this reads
+ * the destination from cubeface1 but that's handled by lowering */
+ bi_instruction cubeface2 = {
+ .type = BI_SPECIAL_ADD,
+ .op.special = BI_SPECIAL_CUBEFACE2,
+ .dest = bi_make_temp(ctx),
+ .dest_type = nir_type_uint32,
+ .src = { coord, coord, coord },
+ .src_types = { nir_type_float32, nir_type_float32, nir_type_float32 },
+ .swizzle = { {0}, {1}, {2} }
+ };
+
+ /* Select S coordinate */
+ bi_instruction cube_ssel = {
+ .type = BI_SPECIAL_ADD,
+ .op.special = BI_SPECIAL_CUBE_SSEL,
+ .dest = bi_make_temp(ctx),
+ .dest_type = nir_type_float32,
+ .src = { coord, coord, cubeface2.dest },
+ .src_types = { nir_type_float32, nir_type_float32, nir_type_uint32 },
+ .swizzle = { {2}, {0} }
+ };
+
+ /* Select T coordinate */
+ bi_instruction cube_tsel = {
+ .type = BI_SPECIAL_ADD,
+ .op.special = BI_SPECIAL_CUBE_TSEL,
+ .dest = bi_make_temp(ctx),
+ .dest_type = nir_type_float32,
+ .src = { coord, coord, cubeface2.dest },
+ .src_types = { nir_type_float32, nir_type_float32, nir_type_uint32 },
+ .swizzle = { {1}, {2} }
+ };
+
+ /* The OpenGL ES specification requires us to transform an input vector
+ * (x, y, z) to the coordinate, given the selected S/T:
+ *
+ * (1/2 ((s / max{x,y,z}) + 1), 1/2 ((t / max{x, y, z}) + 1))
+ *
+ * We implement (s shown, t similar) in a form friendlier to FMA
+ * instructions, and clamp coordinates at the end for correct
+ * NaN/infinity handling:
+ *
+ * fsat(s * (0.5 * (1 / max{x, y, z})) + 0.5)
+ *
+ * Take the reciprocal of max{x, y, z}
+ */
+
+ bi_instruction frcp = {
+ .type = BI_SPECIAL_ADD,
+ .op.special = BI_SPECIAL_FRCP,
+ .dest = bi_make_temp(ctx),
+ .dest_type = nir_type_float32,
+ .src = { cubeface1.dest },
+ .src_types = { nir_type_float32 },
+ };
+
+ /* Calculate 0.5 * (1.0 / max{x, y, z}) */
+ bi_instruction fma1 = {
+ .type = BI_FMA,
+ .dest = bi_make_temp(ctx),
+ .dest_type = nir_type_float32,
+ .src = { frcp.dest, BIR_INDEX_CONSTANT | 0, BIR_INDEX_ZERO },
+ .src_types = { nir_type_float32, nir_type_float32, nir_type_float32 },
+ .constant.u64 = 0x3f000000, /* 0.5f */
+ };
+
+ /* Transform the s coordinate */
+ bi_instruction fma2 = {
+ .type = BI_FMA,
+ .outmod = BIFROST_SAT,
+ .dest = bi_make_temp(ctx),
+ .dest_type = nir_type_float32,
+ .src = { fma1.dest, cube_ssel.dest, BIR_INDEX_CONSTANT | 0 },
+ .src_types = { nir_type_float32, nir_type_float32, nir_type_float32 },
+ .constant.u64 = 0x3f000000, /* 0.5f */
+ };
+
+ /* Transform the t coordinate */
+ bi_instruction fma3 = {
+ .type = BI_FMA,
+ .outmod = BIFROST_SAT,
+ .dest = bi_make_temp(ctx),
+ .dest_type = nir_type_float32,
+ .src = { fma1.dest, cube_tsel.dest, BIR_INDEX_CONSTANT | 0 },
+ .src_types = { nir_type_float32, nir_type_float32, nir_type_float32 },
+ .constant.u64 = 0x3f000000, /* 0.5f */
+ };
+
+ bi_emit(ctx, cubeface1);
+ bi_emit(ctx, cubeface2);
+ bi_emit(ctx, cube_ssel);
+ bi_emit(ctx, cube_tsel);
+ bi_emit(ctx, frcp);
+ bi_emit(ctx, fma1);
+ bi_emit(ctx, fma2);
+ bi_emit(ctx, fma3);
+
+ /* Cube face is stored in bit[29:31], we don't apply the shift here
+ * because the TEXS_CUBE and TEXC instructions expect the face index to
+ * be at this position.
+ */
+ *face = cubeface2.dest;
+ *s = fma2.dest;
+ *t = fma3.dest;
+}
+
+static void
+texc_pack_cube_coord(bi_context *ctx, unsigned coord,
+ unsigned *face_s, unsigned *t)
+{
+ unsigned face, s;
+
+ bi_lower_cube_coord(ctx, coord, &face, &s, t);
+
+ bi_instruction and1 = {
+ .type = BI_BITWISE,
+ .op.bitwise = BI_BITWISE_AND,
+ .dest = bi_make_temp(ctx),
+ .dest_type = nir_type_uint32,
+ .src = { face, BIR_INDEX_CONSTANT | 0, BIR_INDEX_ZERO },
+ .src_types = { nir_type_uint32, nir_type_uint32, nir_type_uint8 },
+ .constant.u64 = 0xe0000000,
+ };
+
+ bi_instruction and2 = {
+ .type = BI_BITWISE,
+ .op.bitwise = BI_BITWISE_AND,
+ .dest = bi_make_temp(ctx),
+ .dest_type = nir_type_uint32,
+ .src = { s, BIR_INDEX_CONSTANT | 0, BIR_INDEX_ZERO },
+ .src_types = { nir_type_uint32, nir_type_uint32, nir_type_uint8 },
+ .constant.u64 = 0x1fffffff,
+ };
+
+ bi_instruction or = {
+ .type = BI_BITWISE,
+ .op.bitwise = BI_BITWISE_OR,
+ .dest = bi_make_temp(ctx),
+ .dest_type = nir_type_uint32,
+ .src = { and1.dest, and2.dest, BIR_INDEX_ZERO },
+ .src_types = { nir_type_uint32, nir_type_uint32, nir_type_uint8 },
+ };
+
+ bi_emit(ctx, and1);
+ bi_emit(ctx, and2);
+ bi_emit(ctx, or);
+
+ /* packed cube-face + s */
+ *face_s = or.dest;
+}
+
/* Map to the main texture op used. Some of these (txd in particular) will
* lower to multiple texture ops with different opcodes (GRDESC_DER + TEX in
* sequence). We assume that lowering is handled elsewhere.
@@ -1507,11 +1674,15 @@ emit_texc(bi_context *ctx, nir_tex_instr *instr)
switch (instr->src[i].src_type) {
case nir_tex_src_coord:
- /* TODO: cube map descriptor */
- tex.src[1] = index;
- tex.src[2] = index;
- tex.swizzle[1][0] = 0;
- tex.swizzle[2][0] = 1;
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+ texc_pack_cube_coord(ctx, index,
+ &tex.src[1], &tex.src[2]);
+ } else {
+ tex.src[1] = index;
+ tex.src[2] = index;
+ tex.swizzle[1][0] = 0;
+ tex.swizzle[2][0] = 1;
+ }
break;
case nir_tex_src_lod:
More information about the mesa-commit
mailing list