[Mesa-dev] [PATCH v2 03/10] radeonsi: restrict cube map derivative computations to the correct plane

Tue Jan 10 16:35:51 UTC 2017

From: Nicolai Hähnle <nicolai.haehnle at amd.com>

As remarked by the comment in the original code, the old algorithm fails when
(tc + deriv) points at a different cube face. Instead, simply project the
derivative directly to the plane of the selected cube face.

The new code is based on exactly differentiating (using the chain rule)
the projection onto a plane corresponding to a fixed cube map face (which
is still selected in the usual way based on the texture coordinate itself).
The computations end up fairly involved, but we do save two reciprocal
computations.

Fixes GL45-CTS.texture_cube_map_array.sampling.

v2: add 0.5 offset to tex coords only after derivative calculation
--
A small change in si_prepare_cube_coords: we re-use coords[i] for the
derivative computation, which requires the scaled value (sc/ma) wihtout
the offset. I've locally rebased subsequent patches as well, and updated
the version at https://cgit.freedesktop.org/~nh/mesa/log/?h=cubemaps.
---
 src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 130 ++++++++++++++++++----
 1 file changed, 107 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
index c0d7220..bef1afe 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
@@ -952,20 +952,76 @@ static void build_cube_intrinsic(struct gallivm_state *gallivm,
 				lp_build_const_int32(gallivm, 0), "");
 		out->stc[0] = LLVMBuildExtractElement(builder, tmp,
 				lp_build_const_int32(gallivm, 1), "");
 		out->ma = LLVMBuildExtractElement(builder, tmp,
 				lp_build_const_int32(gallivm, 2), "");
 		out->id = LLVMBuildExtractElement(builder, tmp,
 				lp_build_const_int32(gallivm, 3), "");
 	}
 }
 
+/**
+ * Build a manual selection sequence for cube face sc/tc coordinates and
+ * major axis vector (multiplied by 2 for consistency) for the given
+ * vec3 \p coords, for the face implied by \p selcoords.
+ *
+ * For the major axis, we always adjust the sign to be in the direction of
+ * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
+ * the selcoords major axis.
+ */
+static void build_cube_select(LLVMBuilderRef builder,
+			      const struct cube_selection_coords *selcoords,
+			      const LLVMValueRef *coords,
+			      LLVMValueRef *out_st,
+			      LLVMValueRef *out_ma)
+{
+	LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
+	LLVMValueRef is_ma_positive;
+	LLVMValueRef sgn_ma;
+	LLVMValueRef is_ma_z, is_not_ma_z;
+	LLVMValueRef is_ma_y;
+	LLVMValueRef is_ma_x;
+	LLVMValueRef sgn;
+	LLVMValueRef tmp;
+
+	is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE,
+		selcoords->ma, LLVMConstReal(f32, 0.0), "");
+	sgn_ma = LLVMBuildSelect(builder, is_ma_positive,
+		LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), "");
+
+	is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
+	is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
+	is_ma_y = LLVMBuildAnd(builder, is_not_ma_z,
+		LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
+	is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
+
+	/* Select sc */
+	tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], "");
+	sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
+		LLVMBuildSelect(builder, is_ma_x, sgn_ma,
+			LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
+	out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
+
+	/* Select tc */
+	tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
+	sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""),
+		LLVMConstReal(f32, -1.0), "");
+	out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
+
+	/* Select ma */
+	tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
+		LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
+	sgn = LLVMBuildSelect(builder, is_ma_positive,
+		LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), "");
+	*out_ma = LLVMBuildFMul(builder, tmp, sgn, "");
+}
+
 static void si_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base,
 				      LLVMValueRef *in, LLVMValueRef *out)
 {
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
 	LLVMBuilderRef builder = gallivm->builder;
 	LLVMTypeRef type = bld_base->base.elem_type;
 	struct cube_selection_coords coords;
 	LLVMValueRef invma;
 	LLVMValueRef mad_args[3];
 
@@ -990,59 +1046,87 @@ static void si_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base,
 void si_prepare_cube_coords(struct lp_build_tgsi_context *bld_base,
 			    struct lp_build_emit_data *emit_data,
 			    LLVMValueRef *coords_arg,
 			    LLVMValueRef *derivs_arg)
 {
 
 	unsigned target = emit_data->inst->Texture.Texture;
 	unsigned opcode = emit_data->inst->Instruction.Opcode;
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
 	LLVMBuilderRef builder = gallivm->builder;
+	LLVMTypeRef type = bld_base->base.elem_type;
+	struct cube_selection_coords selcoords;
 	LLVMValueRef coords[4];
-	unsigned i;
+	LLVMValueRef invma;
 
-	si_llvm_cube_to_2d_coords(bld_base, coords_arg, coords);
+	build_cube_intrinsic(gallivm, coords_arg, &selcoords);
+
+	invma = lp_build_intrinsic(builder, "llvm.fabs.f32",
+			type, &selcoords.ma, 1, LP_FUNC_ATTR_READNONE);
+	invma = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_RCP, invma);
+
+	for (int i = 0; i < 2; ++i)
+		coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
+
+	coords[2] = selcoords.id;
 
 	if (opcode == TGSI_OPCODE_TXD && derivs_arg) {
 		LLVMValueRef derivs[4];
 		int axis;
 
 		/* Convert cube derivatives to 2D derivatives. */
 		for (axis = 0; axis < 2; axis++) {
-			LLVMValueRef shifted_cube_coords[4], shifted_coords[4];
-
-			/* Shift the cube coordinates by the derivatives to get
-			 * the cube coordinates of the "neighboring pixel".
-			 */
-			for (i = 0; i < 3; i++)
-				shifted_cube_coords[i] =
-					LLVMBuildFAdd(builder, coords_arg[i],
-						      derivs_arg[axis*3+i], "");
-			shifted_cube_coords[3] = LLVMGetUndef(bld_base->base.elem_type);
-
-			/* Project the shifted cube coordinates onto the face. */
-			si_llvm_cube_to_2d_coords(bld_base, shifted_cube_coords,
-						      shifted_coords);
-
-			/* Subtract both sets of 2D coordinates to get 2D derivatives.
-			 * This won't work if the shifted coordinates ended up
-			 * in a different face.
+			LLVMValueRef deriv_st[2];
+			LLVMValueRef deriv_ma;
+
+			/* Transform the derivative alongside the texture
+			 * coordinate. Mathematically, the correct formula is
+			 * as follows. Assume we're projecting onto the +Z face
+			 * and denote by dx/dh the derivative of the (original)
+			 * X texture coordinate with respect to horizontal
+			 * window coordinates. The projection onto the +Z face
+			 * plane is:
+			 *
+			 *   f(x,z) = x/z
+			 *
+			 * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
+			 *            = 1/z * dx/dh - x/z * 1/z * dz/dh.
+			 *
+			 * This motivatives the implementation below.
+			 *
+			 * Whether this actually gives the expected results for
+			 * apps that might feed in derivatives obtained via
+			 * finite differences is anyone's guess. The OpenGL spec
+			 * seems awfully quiet about how textureGrad for cube
+			 * maps should be handled.
 			 */
-			for (i = 0; i < 2; i++)
+			build_cube_select(builder, &selcoords, &derivs_arg[axis * 3],
+					  deriv_st, &deriv_ma);
+
+			deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
+
+			for (int i = 0; i < 2; ++i)
 				derivs[axis * 2 + i] =
-					LLVMBuildFSub(builder, shifted_coords[i],
-						      coords[i], "");
+					LLVMBuildFSub(builder,
+						LLVMBuildFMul(builder, deriv_st[i], invma, ""),
+						LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
 		}
 
 		memcpy(derivs_arg, derivs, sizeof(derivs));
 	}
 
+	/* Shift the texture coordinate. This must be applied after the
+	 * derivative calculation.
+	 */
+	for (int i = 0; i < 2; ++i)
+		coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(type, 0.5), "");
+
 	if (target == TGSI_TEXTURE_CUBE_ARRAY ||
 	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
 		/* for cube arrays coord.z = coord.w(array_index) * 8 + face */
 		/* coords_arg.w component - array_index for cube arrays */
 		coords[2] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
 						       coords_arg[3], lp_build_const_float(gallivm, 8.0), coords[2]);
 	}
 
 	/* Preserve compare/lod/bias. Put it in coords.w. */
 	if (opcode == TGSI_OPCODE_TEX2 ||
-- 
2.7.4