[Mesa-dev] [PATCH 3/3] gallivm: kill old per-quad face selection code

Thu Oct 3 12:40:44 PDT 2013

On 10/03/2013 09:42 AM, sroland at vmware.com wrote:
> From: Roland Scheidegger <sroland at vmware.com>
>
> Not used since ages, and it wouldn't work at all with explicit derivatives now
> (not that it did before as it ignored them but now the code would just use
> the derivs pre-projected which would be quite random numbers).
> ---
>   src/gallium/auxiliary/gallivm/lp_bld_sample.c |  751 +++++++++++--------------
>   1 file changed, 313 insertions(+), 438 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> index ce05522..3fac981 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> @@ -1493,323 +1493,135 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
>                        struct lp_derivatives *derivs_out, /* optional */
>                        boolean need_derivs)
>   {
> +   /*
> +    * Do per-pixel face selection. We cannot however (as we used to do)
> +    * simply calculate the derivs afterwards (which is very bogus for
> +    * explicit derivs btw) because the values would be "random" when
> +    * not all pixels lie on the same face. So what we do here is just
> +    * calculate the derivatives after scaling the coords by the absolute
> +    * value of the inverse major axis, and essentially do rho calculation
> +    * steps as if it were a 3d texture. This is perfect if all pixels hit
> +    * the same face, but not so great at edges, I believe the max error
> +    * should be sqrt(2) with no_rho_approx or 2 otherwise (essentially measuring
> +    * the 3d distance between 2 points on the cube instead of measuring up/down
> +    * the edge). Still this is possibly a win over just selecting the same face
> +    * for all pixels. Unfortunately, something like that doesn't work for
> +    * explicit derivatives.
> +    */
>      struct lp_build_context *coord_bld = &bld->coord_bld;
>      LLVMBuilderRef builder = bld->gallivm->builder;
>      struct gallivm_state *gallivm = bld->gallivm;
>      LLVMValueRef si, ti, ri;
> +   struct lp_build_context *cint_bld = &bld->int_coord_bld;
> +   struct lp_type intctype = cint_bld->type;
> +   LLVMValueRef signs, signt, signr, signma;
> +   LLVMValueRef as, at, ar, face, face_s, face_t;
> +   LLVMValueRef as_ge_at, maxasat, ar_ge_as_at;
> +   LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz;
> +   LLVMValueRef tnegi, rnegi;
> +   LLVMValueRef ma, mai, imahalfpos;
> +   LLVMValueRef posHalf = lp_build_const_vec(gallivm, coord_bld->type, 0.5);
> +   LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype,
> +                                                  1 << (intctype.width - 1));
> +   LLVMValueRef signshift = lp_build_const_int_vec(gallivm, intctype,
> +                                                   intctype.width -1);
> +   LLVMValueRef facex = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_X);
> +   LLVMValueRef facey = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Y);
> +   LLVMValueRef facez = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Z);
> +   LLVMValueRef s = coords[0];
> +   LLVMValueRef t = coords[1];
> +   LLVMValueRef r = coords[2];
> +
> +   assert(PIPE_TEX_FACE_NEG_X == PIPE_TEX_FACE_POS_X + 1);
> +   assert(PIPE_TEX_FACE_NEG_Y == PIPE_TEX_FACE_POS_Y + 1);
> +   assert(PIPE_TEX_FACE_NEG_Z == PIPE_TEX_FACE_POS_Z + 1);
>
> -   if (1 || coord_bld->type.length > 4) {
> -      /*
> -       * Do per-pixel face selection. We cannot however (as we used to do)
> -       * simply calculate the derivs afterwards (which is very bogus for
> -       * explicit derivs btw) because the values would be "random" when
> -       * not all pixels lie on the same face. So what we do here is just
> -       * calculate the derivatives after scaling the coords by the absolute
> -       * value of the inverse major axis, and essentially do rho calculation
> -       * steps as if it were a 3d texture. This is perfect if all pixels hit
> -       * the same face, but not so great at edges, I believe the max error
> -       * should be sqrt(2) with no_rho_approx or 2 otherwise (essentially measuring
> -       * the 3d distance between 2 points on the cube instead of measuring up/down
> -       * the edge). Still this is possibly a win over just selecting the same face
> -       * for all pixels. Unfortunately, something like that doesn't work for
> -       * explicit derivatives.
> -       */
> -      struct lp_build_context *cint_bld = &bld->int_coord_bld;
> -      struct lp_type intctype = cint_bld->type;
> -      LLVMValueRef signs, signt, signr, signma;
> -      LLVMValueRef as, at, ar, face, face_s, face_t;
> -      LLVMValueRef as_ge_at, maxasat, ar_ge_as_at;
> -      LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz;
> -      LLVMValueRef tnegi, rnegi;
> -      LLVMValueRef ma, mai, imahalfpos;
> -      LLVMValueRef posHalf = lp_build_const_vec(gallivm, coord_bld->type, 0.5);
> -      LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype,
> -                                                     1 << (intctype.width - 1));
> -      LLVMValueRef signshift = lp_build_const_int_vec(gallivm, intctype,
> -                                                      intctype.width -1);
> -      LLVMValueRef facex = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_X);
> -      LLVMValueRef facey = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Y);
> -      LLVMValueRef facez = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Z);
> -      LLVMValueRef s = coords[0];
> -      LLVMValueRef t = coords[1];
> -      LLVMValueRef r = coords[2];
> -
> -      assert(PIPE_TEX_FACE_NEG_X == PIPE_TEX_FACE_POS_X + 1);
> -      assert(PIPE_TEX_FACE_NEG_Y == PIPE_TEX_FACE_POS_Y + 1);
> -      assert(PIPE_TEX_FACE_NEG_Z == PIPE_TEX_FACE_POS_Z + 1);
> +   /*
> +    * get absolute value (for x/y/z face selection) and sign bit
> +    * (for mirroring minor coords and pos/neg face selection)
> +    * of the original coords.
> +    */
> +   as = lp_build_abs(&bld->coord_bld, s);
> +   at = lp_build_abs(&bld->coord_bld, t);
> +   ar = lp_build_abs(&bld->coord_bld, r);
> +
> +   /*
> +    * major face determination: select x if x > y else select y
> +    * select z if z >= max(x,y) else select previous result
> +    * if some axis are the same we chose z over y, y over x - the
> +    * dx10 spec seems to ask for it while OpenGL doesn't care (if we
> +    * wouldn't care could save a select or two if using different
> +    * compares and doing at_g_as_ar last since tnewx and tnewz are the
> +    * same).
> +    */
> +   as_ge_at = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, as, at);
> +   maxasat = lp_build_max(coord_bld, as, at);
> +   ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat);
>
> +   if (need_derivs && (derivs_in ||
> +       ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
> +        (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX)))) {
>         /*
> -       * get absolute value (for x/y/z face selection) and sign bit
> -       * (for mirroring minor coords and pos/neg face selection)
> -       * of the original coords.
> +       * XXX: This is really really complex.
> +       * It is a bit overkill to use this for implicit derivatives as well,
> +       * no way this is worth the cost in practice, but seems to be the
> +       * only way for getting accurate and per-pixel lod values.
>          */
> -      as = lp_build_abs(&bld->coord_bld, s);
> -      at = lp_build_abs(&bld->coord_bld, t);
> -      ar = lp_build_abs(&bld->coord_bld, r);
> -
> +      LLVMValueRef imapos, tmp, ddx[3], ddy[3];
> +      LLVMValueRef madx, mady, madxdivma, madydivma;
> +      LLVMValueRef sdxi, tdxi, rdxi, signsdx, signtdx, signrdx;
> +      LLVMValueRef sdyi, tdyi, rdyi, signsdy, signtdy, signrdy;
> +      LLVMValueRef tdxnegi, rdxnegi, tdynegi, rdynegi;
> +      LLVMValueRef sdxnewx, sdxnewy, sdxnewz, tdxnewx, tdxnewy, tdxnewz;
> +      LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz;
> +      LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy;
> +      LLVMValueRef posHalf = lp_build_const_vec(coord_bld->gallivm,
> +                                                coord_bld->type, 0.5);

It looks like we already declared posHalf at the top of the function.

[...]

This is pretty complicated stuff but the code looks good otherwise, AFAICT.

For 1-3, Reviewed-by: Brian Paul <brianp at vmware.com>