[Mesa-dev] [PATCH v2 2/9] radeonsi: restrict cube map derivative computations to the correct plane

Thu Jan 12 20:38:10 UTC 2017

On Thu, Jan 12, 2017 at 4:39 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> As remarked by the comment in the original code, the old algorithm fails when
> (tc + deriv) points at a different cube face. Instead, simply project the
> derivative directly to the plane of the selected cube face.
>
> The new code is based on exactly differentiating (using the chain rule)
> the projection onto a plane corresponding to a fixed cube map face (which
> is still selected in the usual way based on the texture coordinate itself).
> The computations end up fairly involved, but we do save two reciprocal
> computations.
>
> Fixes GL45-CTS.texture_cube_map_array.sampling.
>
> v2: add 0.5 offset to tex coords only after derivative calculation
> v3: go back to 1.5 offset
>
> Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl> (v2)
> ---
>  src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 130 ++++++++++++++++++----
>  1 file changed, 107 insertions(+), 23 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> index 9b8c9b0..60346cb 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
> @@ -952,20 +952,76 @@ static void build_cube_intrinsic(struct gallivm_state *gallivm,
>                                 lp_build_const_int32(gallivm, 0), "");
>                 out->stc[0] = LLVMBuildExtractElement(builder, tmp,
>                                 lp_build_const_int32(gallivm, 1), "");
>                 out->ma = LLVMBuildExtractElement(builder, tmp,
>                                 lp_build_const_int32(gallivm, 2), "");
>                 out->id = LLVMBuildExtractElement(builder, tmp,
>                                 lp_build_const_int32(gallivm, 3), "");
>         }
>  }
>
> +/**
> + * Build a manual selection sequence for cube face sc/tc coordinates and
> + * major axis vector (multiplied by 2 for consistency) for the given
> + * vec3 \p coords, for the face implied by \p selcoords.
> + *
> + * For the major axis, we always adjust the sign to be in the direction of
> + * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
> + * the selcoords major axis.
> + */
> +static void build_cube_select(LLVMBuilderRef builder,
> +                             const struct cube_selection_coords *selcoords,
> +                             const LLVMValueRef *coords,
> +                             LLVMValueRef *out_st,
> +                             LLVMValueRef *out_ma)
> +{
> +       LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
> +       LLVMValueRef is_ma_positive;
> +       LLVMValueRef sgn_ma;
> +       LLVMValueRef is_ma_z, is_not_ma_z;
> +       LLVMValueRef is_ma_y;
> +       LLVMValueRef is_ma_x;
> +       LLVMValueRef sgn;
> +       LLVMValueRef tmp;
> +
> +       is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE,
> +               selcoords->ma, LLVMConstReal(f32, 0.0), "");
> +       sgn_ma = LLVMBuildSelect(builder, is_ma_positive,
> +               LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), "");
> +
> +       is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
> +       is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
> +       is_ma_y = LLVMBuildAnd(builder, is_not_ma_z,
> +               LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
> +       is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
> +
> +       /* Select sc */
> +       tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], "");
> +       sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
> +               LLVMBuildSelect(builder, is_ma_x, sgn_ma,
> +                       LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
> +       out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
> +
> +       /* Select tc */
> +       tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
> +       sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""),
> +               LLVMConstReal(f32, -1.0), "");
> +       out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
> +
> +       /* Select ma */
> +       tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
> +               LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
> +       sgn = LLVMBuildSelect(builder, is_ma_positive,
> +               LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), "");
> +       *out_ma = LLVMBuildFMul(builder, tmp, sgn, "");
> +}
> +
>  static void si_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base,
>                                       LLVMValueRef *in, LLVMValueRef *out)
>  {
>         struct gallivm_state *gallivm = bld_base->base.gallivm;
>         LLVMBuilderRef builder = gallivm->builder;
>         LLVMTypeRef type = bld_base->base.elem_type;
>         struct cube_selection_coords coords;
>         LLVMValueRef invma;
>         LLVMValueRef mad_args[3];
>
> @@ -990,59 +1046,87 @@ static void si_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base,
>  void si_prepare_cube_coords(struct lp_build_tgsi_context *bld_base,
>                             struct lp_build_emit_data *emit_data,
>                             LLVMValueRef *coords_arg,
>                             LLVMValueRef *derivs_arg)
>  {
>
>         unsigned target = emit_data->inst->Texture.Texture;
>         unsigned opcode = emit_data->inst->Instruction.Opcode;
>         struct gallivm_state *gallivm = bld_base->base.gallivm;
>         LLVMBuilderRef builder = gallivm->builder;
> +       LLVMTypeRef type = bld_base->base.elem_type;
> +       struct cube_selection_coords selcoords;
>         LLVMValueRef coords[4];
> -       unsigned i;
> +       LLVMValueRef invma;
>
> -       si_llvm_cube_to_2d_coords(bld_base, coords_arg, coords);
> +       build_cube_intrinsic(gallivm, coords_arg, &selcoords);
> +
> +       invma = lp_build_intrinsic(builder, "llvm.fabs.f32",
> +                       type, &selcoords.ma, 1, LP_FUNC_ATTR_READNONE);
> +       invma = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_RCP, invma);
> +
> +       for (int i = 0; i < 2; ++i)
> +               coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
> +
> +       coords[2] = selcoords.id;
>
>         if (opcode == TGSI_OPCODE_TXD && derivs_arg) {
>                 LLVMValueRef derivs[4];
>                 int axis;
>
>                 /* Convert cube derivatives to 2D derivatives. */
>                 for (axis = 0; axis < 2; axis++) {
> -                       LLVMValueRef shifted_cube_coords[4], shifted_coords[4];
> -
> -                       /* Shift the cube coordinates by the derivatives to get
> -                        * the cube coordinates of the "neighboring pixel".
> -                        */
> -                       for (i = 0; i < 3; i++)
> -                               shifted_cube_coords[i] =
> -                                       LLVMBuildFAdd(builder, coords_arg[i],
> -                                                     derivs_arg[axis*3+i], "");
> -                       shifted_cube_coords[3] = LLVMGetUndef(bld_base->base.elem_type);
> -
> -                       /* Project the shifted cube coordinates onto the face. */
> -                       si_llvm_cube_to_2d_coords(bld_base, shifted_cube_coords,
> -                                                     shifted_coords);
> -
> -                       /* Subtract both sets of 2D coordinates to get 2D derivatives.
> -                        * This won't work if the shifted coordinates ended up
> -                        * in a different face.
> +                       LLVMValueRef deriv_st[2];
> +                       LLVMValueRef deriv_ma;
> +
> +                       /* Transform the derivative alongside the texture
> +                        * coordinate. Mathematically, the correct formula is
> +                        * as follows. Assume we're projecting onto the +Z face
> +                        * and denote by dx/dh the derivative of the (original)
> +                        * X texture coordinate with respect to horizontal
> +                        * window coordinates. The projection onto the +Z face
> +                        * plane is:
> +                        *
> +                        *   f(x,z) = x/z
> +                        *
> +                        * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
> +                        *            = 1/z * dx/dh - x/z * 1/z * dz/dh.
> +                        *
> +                        * This motivatives the implementation below.
> +                        *
> +                        * Whether this actually gives the expected results for
> +                        * apps that might feed in derivatives obtained via
> +                        * finite differences is anyone's guess. The OpenGL spec
> +                        * seems awfully quiet about how textureGrad for cube
> +                        * maps should be handled.
>                          */
> -                       for (i = 0; i < 2; i++)
> +                       build_cube_select(builder, &selcoords, &derivs_arg[axis * 3],
> +                                         deriv_st, &deriv_ma);
> +
> +                       deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
> +
> +                       for (int i = 0; i < 2; ++i)
>                                 derivs[axis * 2 + i] =
> -                                       LLVMBuildFSub(builder, shifted_coords[i],
> -                                                     coords[i], "");
> +                                       LLVMBuildFSub(builder,
> +                                               LLVMBuildFMul(builder, deriv_st[i], invma, ""),
> +                                               LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
>                 }
>
>                 memcpy(derivs_arg, derivs, sizeof(derivs));
>         }
>
> +       /* Shift the texture coordinate. This must be applied after the
> +        * derivative calculation.
> +        */
> +       for (int i = 0; i < 2; ++i)
> +               coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(type, 0.5), "");

Should this have been 1.5?

Marek