[Mesa-dev] [PATCH 3/3] gallivm: kill old per-quad face selection code
Jose Fonseca
jfonseca at vmware.com
Wed Oct 9 14:17:14 PDT 2013
Updated series looks good to me.
Jose
----- Original Message -----
> From: Roland Scheidegger <sroland at vmware.com>
>
> Not used since ages, and it wouldn't work at all with explicit derivatives
> now
> (not that it did before as it ignored them but now the code would just use
> the derivs pre-projected which would be quite random numbers).
>
> v2: also get rid of 3 helper functions no longer used.
> ---
> src/gallium/auxiliary/gallivm/lp_bld_sample.c | 761
> ++++++++++---------------
> 1 file changed, 286 insertions(+), 475 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> index 39c3a2f..1c35200 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> @@ -1414,72 +1414,6 @@ lp_build_cube_imapos(struct lp_build_context
> *coord_bld, LLVMValueRef coord)
> return ima;
> }
>
> -/** Helper used by lp_build_cube_lookup() */
> -static LLVMValueRef
> -lp_build_cube_imaneg(struct lp_build_context *coord_bld, LLVMValueRef coord)
> -{
> - /* ima = -0.5 / abs(coord); */
> - LLVMValueRef negHalf = lp_build_const_vec(coord_bld->gallivm,
> coord_bld->type, -0.5);
> - LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
> - LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord);
> - return ima;
> -}
> -
> -/**
> - * Helper used by lp_build_cube_lookup()
> - * FIXME: the sign here can also be 0.
> - * Arithmetically this could definitely make a difference. Either
> - * fix the comment or use other (simpler) sign function, not sure
> - * which one it should be.
> - * \param sign scalar +1 or -1
> - * \param coord float vector
> - * \param ima float vector
> - */
> -static LLVMValueRef
> -lp_build_cube_coord(struct lp_build_context *coord_bld,
> - LLVMValueRef sign, int negate_coord,
> - LLVMValueRef coord, LLVMValueRef ima)
> -{
> - /* return negate(coord) * ima * sign + 0.5; */
> - LLVMValueRef half = lp_build_const_vec(coord_bld->gallivm,
> coord_bld->type, 0.5);
> - LLVMValueRef res;
> -
> - assert(negate_coord == +1 || negate_coord == -1);
> -
> - if (negate_coord == -1) {
> - coord = lp_build_negate(coord_bld, coord);
> - }
> -
> - res = lp_build_mul(coord_bld, coord, ima);
> - if (sign) {
> - sign = lp_build_broadcast_scalar(coord_bld, sign);
> - res = lp_build_mul(coord_bld, res, sign);
> - }
> - res = lp_build_add(coord_bld, res, half);
> -
> - return res;
> -}
> -
> -
> -/** Helper used by lp_build_cube_lookup()
> - * Return (major_coord >= 0) ? pos_face : neg_face;
> - */
> -static LLVMValueRef
> -lp_build_cube_face(struct lp_build_sample_context *bld,
> - LLVMValueRef major_coord,
> - unsigned pos_face, unsigned neg_face)
> -{
> - struct gallivm_state *gallivm = bld->gallivm;
> - LLVMBuilderRef builder = gallivm->builder;
> - LLVMValueRef cmp = LLVMBuildFCmp(builder, LLVMRealUGE,
> - major_coord,
> - bld->float_bld.zero, "");
> - LLVMValueRef pos = lp_build_const_int32(gallivm, pos_face);
> - LLVMValueRef neg = lp_build_const_int32(gallivm, neg_face);
> - LLVMValueRef res = LLVMBuildSelect(builder, cmp, pos, neg, "");
> - return res;
> -}
> -
>
> /** Helper for doing 3-wise selection.
> * Returns sel1 ? val2 : (sel0 ? val0 : val1).
> @@ -1497,6 +1431,7 @@ lp_build_select3(struct lp_build_context *sel_bld,
> return lp_build_select(sel_bld, sel1, val2, tmp);
> }
>
> +
> /**
> * Generate code to do cube face selection and compute per-face texcoords.
> */
> @@ -1513,301 +1448,141 @@ lp_build_cube_lookup(struct lp_build_sample_context
> *bld,
> struct gallivm_state *gallivm = bld->gallivm;
> LLVMValueRef si, ti, ri;
>
> - if (1 || coord_bld->type.length > 4) {
> - /*
> - * Do per-pixel face selection. We cannot however (as we used to do)
> - * simply calculate the derivs afterwards (which is very bogus for
> - * explicit derivs btw) because the values would be "random" when
> - * not all pixels lie on the same face. So what we do here is just
> - * calculate the derivatives after scaling the coords by the absolute
> - * value of the inverse major axis, and essentially do rho calculation
> - * steps as if it were a 3d texture. This is perfect if all pixels hit
> - * the same face, but not so great at edges, I believe the max error
> - * should be sqrt(2) with no_rho_approx or 2 otherwise (essentially
> measuring
> - * the 3d distance between 2 points on the cube instead of measuring
> up/down
> - * the edge). Still this is possibly a win over just selecting the
> same face
> - * for all pixels. Unfortunately, something like that doesn't work for
> - * explicit derivatives.
> - */
> - struct lp_build_context *cint_bld = &bld->int_coord_bld;
> - struct lp_type intctype = cint_bld->type;
> - LLVMTypeRef coord_vec_type = coord_bld->vec_type;
> - LLVMTypeRef cint_vec_type = cint_bld->vec_type;
> - LLVMValueRef as, at, ar, face, face_s, face_t;
> - LLVMValueRef as_ge_at, maxasat, ar_ge_as_at;
> - LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz;
> - LLVMValueRef tnegi, rnegi;
> - LLVMValueRef ma, mai, signma, signmabit, imahalfpos;
> - LLVMValueRef posHalf = lp_build_const_vec(gallivm, coord_bld->type,
> 0.5);
> - LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype,
> - 1 << (intctype.width -
> 1));
> - LLVMValueRef signshift = lp_build_const_int_vec(gallivm, intctype,
> - intctype.width -1);
> - LLVMValueRef facex = lp_build_const_int_vec(gallivm, intctype,
> PIPE_TEX_FACE_POS_X);
> - LLVMValueRef facey = lp_build_const_int_vec(gallivm, intctype,
> PIPE_TEX_FACE_POS_Y);
> - LLVMValueRef facez = lp_build_const_int_vec(gallivm, intctype,
> PIPE_TEX_FACE_POS_Z);
> - LLVMValueRef s = coords[0];
> - LLVMValueRef t = coords[1];
> - LLVMValueRef r = coords[2];
> -
> - assert(PIPE_TEX_FACE_NEG_X == PIPE_TEX_FACE_POS_X + 1);
> - assert(PIPE_TEX_FACE_NEG_Y == PIPE_TEX_FACE_POS_Y + 1);
> - assert(PIPE_TEX_FACE_NEG_Z == PIPE_TEX_FACE_POS_Z + 1);
> + /*
> + * Do per-pixel face selection. We cannot however (as we used to do)
> + * simply calculate the derivs afterwards (which is very bogus for
> + * explicit derivs btw) because the values would be "random" when
> + * not all pixels lie on the same face. So what we do here is just
> + * calculate the derivatives after scaling the coords by the absolute
> + * value of the inverse major axis, and essentially do rho calculation
> + * steps as if it were a 3d texture. This is perfect if all pixels hit
> + * the same face, but not so great at edges, I believe the max error
> + * should be sqrt(2) with no_rho_approx or 2 otherwise (essentially
> measuring
> + * the 3d distance between 2 points on the cube instead of measuring
> up/down
> + * the edge). Still this is possibly a win over just selecting the same
> face
> + * for all pixels. Unfortunately, something like that doesn't work for
> + * explicit derivatives.
> + */
> + struct lp_build_context *cint_bld = &bld->int_coord_bld;
> + struct lp_type intctype = cint_bld->type;
> + LLVMTypeRef coord_vec_type = coord_bld->vec_type;
> + LLVMTypeRef cint_vec_type = cint_bld->vec_type;
> + LLVMValueRef as, at, ar, face, face_s, face_t;
> + LLVMValueRef as_ge_at, maxasat, ar_ge_as_at;
> + LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz;
> + LLVMValueRef tnegi, rnegi;
> + LLVMValueRef ma, mai, signma, signmabit, imahalfpos;
> + LLVMValueRef posHalf = lp_build_const_vec(gallivm, coord_bld->type, 0.5);
> + LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype,
> + 1 << (intctype.width -
> 1));
> + LLVMValueRef signshift = lp_build_const_int_vec(gallivm, intctype,
> + intctype.width -1);
> + LLVMValueRef facex = lp_build_const_int_vec(gallivm, intctype,
> PIPE_TEX_FACE_POS_X);
> + LLVMValueRef facey = lp_build_const_int_vec(gallivm, intctype,
> PIPE_TEX_FACE_POS_Y);
> + LLVMValueRef facez = lp_build_const_int_vec(gallivm, intctype,
> PIPE_TEX_FACE_POS_Z);
> + LLVMValueRef s = coords[0];
> + LLVMValueRef t = coords[1];
> + LLVMValueRef r = coords[2];
> +
> + assert(PIPE_TEX_FACE_NEG_X == PIPE_TEX_FACE_POS_X + 1);
> + assert(PIPE_TEX_FACE_NEG_Y == PIPE_TEX_FACE_POS_Y + 1);
> + assert(PIPE_TEX_FACE_NEG_Z == PIPE_TEX_FACE_POS_Z + 1);
> +
> + /*
> + * get absolute value (for x/y/z face selection) and sign bit
> + * (for mirroring minor coords and pos/neg face selection)
> + * of the original coords.
> + */
> + as = lp_build_abs(&bld->coord_bld, s);
> + at = lp_build_abs(&bld->coord_bld, t);
> + ar = lp_build_abs(&bld->coord_bld, r);
>
> + /*
> + * major face determination: select x if x > y else select y
> + * select z if z >= max(x,y) else select previous result
> + * if some axis are the same we chose z over y, y over x - the
> + * dx10 spec seems to ask for it while OpenGL doesn't care (if we
> + * wouldn't care could save a select or two if using different
> + * compares and doing at_g_as_ar last since tnewx and tnewz are the
> + * same).
> + */
> + as_ge_at = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, as, at);
> + maxasat = lp_build_max(coord_bld, as, at);
> + ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat);
> +
> + if (need_derivs && (derivs_in ||
> + ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
> + (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX)))) {
> /*
> - * get absolute value (for x/y/z face selection) and sign bit
> - * (for mirroring minor coords and pos/neg face selection)
> - * of the original coords.
> + * XXX: This is really really complex.
> + * It is a bit overkill to use this for implicit derivatives as well,
> + * no way this is worth the cost in practice, but seems to be the
> + * only way for getting accurate and per-pixel lod values.
> */
> - as = lp_build_abs(&bld->coord_bld, s);
> - at = lp_build_abs(&bld->coord_bld, t);
> - ar = lp_build_abs(&bld->coord_bld, r);
> -
> + LLVMValueRef ima, imahalf, tmp, ddx[3], ddy[3];
> + LLVMValueRef madx, mady, madxdivma, madydivma;
> + LLVMValueRef sdxi, tdxi, rdxi, sdyi, tdyi, rdyi;
> + LLVMValueRef tdxnegi, rdxnegi, tdynegi, rdynegi;
> + LLVMValueRef sdxnewx, sdxnewy, sdxnewz, tdxnewx, tdxnewy, tdxnewz;
> + LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz;
> + LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy;
> /*
> - * major face determination: select x if x > y else select y
> - * select z if z >= max(x,y) else select previous result
> - * if some axis are the same we chose z over y, y over x - the
> - * dx10 spec seems to ask for it while OpenGL doesn't care (if we
> - * wouldn't care could save a select or two if using different
> - * compares and doing at_g_as_ar last since tnewx and tnewz are the
> - * same).
> + * s = 1/2 * ( sc / ma + 1)
> + * t = 1/2 * ( tc / ma + 1)
> + *
> + * s' = 1/2 * (sc' * ma - sc * ma') / ma^2
> + * t' = 1/2 * (tc' * ma - tc * ma') / ma^2
> + *
> + * dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma
> + * dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma
> + * dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma
> + * dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma
> */
> - as_ge_at = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, as, at);
> - maxasat = lp_build_max(coord_bld, as, at);
> - ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat);
> -
> - if (need_derivs && (derivs_in ||
> - ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
> - (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX)))) {
> - /*
> - * XXX: This is really really complex.
> - * It is a bit overkill to use this for implicit derivatives as
> well,
> - * no way this is worth the cost in practice, but seems to be the
> - * only way for getting accurate and per-pixel lod values.
> - */
> - LLVMValueRef ima, imahalf, tmp, ddx[3], ddy[3];
> - LLVMValueRef madx, mady, madxdivma, madydivma;
> - LLVMValueRef sdxi, tdxi, rdxi, sdyi, tdyi, rdyi;
> - LLVMValueRef tdxnegi, rdxnegi, tdynegi, rdynegi;
> - LLVMValueRef sdxnewx, sdxnewy, sdxnewz, tdxnewx, tdxnewy, tdxnewz;
> - LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz;
> - LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy;
> - /*
> - * s = 1/2 * ( sc / ma + 1)
> - * t = 1/2 * ( tc / ma + 1)
> - *
> - * s' = 1/2 * (sc' * ma - sc * ma') / ma^2
> - * t' = 1/2 * (tc' * ma - tc * ma') / ma^2
> - *
> - * dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma
> - * dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma
> - * dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma
> - * dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma
> - */
> -
> - /* select ma, calculate ima */
> - ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
> - mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
> - signmabit = LLVMBuildAnd(builder, mai, signmask, "");
> - ima = lp_build_div(coord_bld, coord_bld->one, ma);
> - imahalf = lp_build_mul(coord_bld, posHalf, ima);
> - imahalfpos = lp_build_abs(coord_bld, imahalf);
> -
> - if (!derivs_in) {
> - ddx[0] = lp_build_ddx(coord_bld, s);
> - ddx[1] = lp_build_ddx(coord_bld, t);
> - ddx[2] = lp_build_ddx(coord_bld, r);
> - ddy[0] = lp_build_ddy(coord_bld, s);
> - ddy[1] = lp_build_ddy(coord_bld, t);
> - ddy[2] = lp_build_ddy(coord_bld, r);
> - }
> - else {
> - ddx[0] = derivs_in->ddx[0];
> - ddx[1] = derivs_in->ddx[1];
> - ddx[2] = derivs_in->ddx[2];
> - ddy[0] = derivs_in->ddy[0];
> - ddy[1] = derivs_in->ddy[1];
> - ddy[2] = derivs_in->ddy[2];
> - }
> -
> - /* select major derivatives */
> - madx = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddx[0],
> ddx[1], ddx[2]);
> - mady = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddy[0],
> ddy[1], ddy[2]);
> -
> - si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
> - ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
> - ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
> -
> - sdxi = LLVMBuildBitCast(builder, ddx[0], cint_vec_type, "");
> - tdxi = LLVMBuildBitCast(builder, ddx[1], cint_vec_type, "");
> - rdxi = LLVMBuildBitCast(builder, ddx[2], cint_vec_type, "");
> -
> - sdyi = LLVMBuildBitCast(builder, ddy[0], cint_vec_type, "");
> - tdyi = LLVMBuildBitCast(builder, ddy[1], cint_vec_type, "");
> - rdyi = LLVMBuildBitCast(builder, ddy[2], cint_vec_type, "");
>
> - /*
> - * compute all possible new s/t coords, which does the mirroring,
> - * and do the same for derivs minor axes.
> - * snewx = signma * -r;
> - * tnewx = -t;
> - * snewy = s;
> - * tnewy = signma * r;
> - * snewz = signma * s;
> - * tnewz = -t;
> - */
> - tnegi = LLVMBuildXor(builder, ti, signmask, "");
> - rnegi = LLVMBuildXor(builder, ri, signmask, "");
> - tdxnegi = LLVMBuildXor(builder, tdxi, signmask, "");
> - rdxnegi = LLVMBuildXor(builder, rdxi, signmask, "");
> - tdynegi = LLVMBuildXor(builder, tdyi, signmask, "");
> - rdynegi = LLVMBuildXor(builder, rdyi, signmask, "");
> -
> - snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
> - tnewx = tnegi;
> - sdxnewx = LLVMBuildXor(builder, signmabit, rdxnegi, "");
> - tdxnewx = tdxnegi;
> - sdynewx = LLVMBuildXor(builder, signmabit, rdynegi, "");
> - tdynewx = tdynegi;
> -
> - snewy = si;
> - tnewy = LLVMBuildXor(builder, signmabit, ri, "");
> - sdxnewy = sdxi;
> - tdxnewy = LLVMBuildXor(builder, signmabit, rdxi, "");
> - sdynewy = sdyi;
> - tdynewy = LLVMBuildXor(builder, signmabit, rdyi, "");
> -
> - snewz = LLVMBuildXor(builder, signmabit, si, "");
> - tnewz = tnegi;
> - sdxnewz = LLVMBuildXor(builder, signmabit, sdxi, "");
> - tdxnewz = tdxnegi;
> - sdynewz = LLVMBuildXor(builder, signmabit, sdyi, "");
> - tdynewz = tdynegi;
> -
> - /* select the mirrored values */
> - face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex,
> facey, facez);
> - face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx,
> snewy, snewz);
> - face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx,
> tnewy, tnewz);
> - face_sdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at,
> sdxnewx, sdxnewy, sdxnewz);
> - face_tdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at,
> tdxnewx, tdxnewy, tdxnewz);
> - face_sdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at,
> sdynewx, sdynewy, sdynewz);
> - face_tdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at,
> tdynewx, tdynewy, tdynewz);
> -
> - face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
> - face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
> - face_sdx = LLVMBuildBitCast(builder, face_sdx, coord_vec_type, "");
> - face_tdx = LLVMBuildBitCast(builder, face_tdx, coord_vec_type, "");
> - face_sdy = LLVMBuildBitCast(builder, face_sdy, coord_vec_type, "");
> - face_tdy = LLVMBuildBitCast(builder, face_tdy, coord_vec_type, "");
> -
> - /* deriv math, dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma */
> - madxdivma = lp_build_mul(coord_bld, madx, ima);
> - tmp = lp_build_mul(coord_bld, madxdivma, face_s);
> - tmp = lp_build_sub(coord_bld, face_sdx, tmp);
> - derivs_out->ddx[0] = lp_build_mul(coord_bld, tmp, imahalf);
> -
> - /* dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma */
> - tmp = lp_build_mul(coord_bld, madxdivma, face_t);
> - tmp = lp_build_sub(coord_bld, face_tdx, tmp);
> - derivs_out->ddx[1] = lp_build_mul(coord_bld, tmp, imahalf);
> -
> - /* dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma */
> - madydivma = lp_build_mul(coord_bld, mady, ima);
> - tmp = lp_build_mul(coord_bld, madydivma, face_s);
> - tmp = lp_build_sub(coord_bld, face_sdy, tmp);
> - derivs_out->ddy[0] = lp_build_mul(coord_bld, tmp, imahalf);
> -
> - /* dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma */
> - tmp = lp_build_mul(coord_bld, madydivma, face_t);
> - tmp = lp_build_sub(coord_bld, face_tdy, tmp);
> - derivs_out->ddy[1] = lp_build_mul(coord_bld, tmp, imahalf);
> -
> - signma = LLVMBuildLShr(builder, mai, signshift, "");
> - coords[2] = LLVMBuildOr(builder, face, signma, "face");
> -
> - /* project coords */
> - face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
> - face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
> -
> - coords[0] = lp_build_add(coord_bld, face_s, posHalf);
> - coords[1] = lp_build_add(coord_bld, face_t, posHalf);
> -
> - return;
> + /* select ma, calculate ima */
> + ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
> + mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
> + signmabit = LLVMBuildAnd(builder, mai, signmask, "");
> + ima = lp_build_div(coord_bld, coord_bld->one, ma);
> + imahalf = lp_build_mul(coord_bld, posHalf, ima);
> + imahalfpos = lp_build_abs(coord_bld, imahalf);
> +
> + if (!derivs_in) {
> + ddx[0] = lp_build_ddx(coord_bld, s);
> + ddx[1] = lp_build_ddx(coord_bld, t);
> + ddx[2] = lp_build_ddx(coord_bld, r);
> + ddy[0] = lp_build_ddy(coord_bld, s);
> + ddy[1] = lp_build_ddy(coord_bld, t);
> + ddy[2] = lp_build_ddy(coord_bld, r);
> }
> -
> - else if (need_derivs) {
> - LLVMValueRef ddx_ddy[2], tmp[3], rho_vec;
> - static const unsigned char swizzle0[] = { /* no-op swizzle */
> - 0, LP_BLD_SWIZZLE_DONTCARE,
> - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> - };
> - static const unsigned char swizzle1[] = {
> - 1, LP_BLD_SWIZZLE_DONTCARE,
> - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> - };
> - static const unsigned char swizzle01[] = { /* no-op swizzle */
> - 0, 1,
> - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> - };
> - static const unsigned char swizzle23[] = {
> - 2, 3,
> - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> - };
> - static const unsigned char swizzle02[] = {
> - 0, 2,
> - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> - };
> -
> - /*
> - * scale the s/t/r coords pre-select/mirror so we can calculate
> - * "reasonable" derivs.
> - */
> - ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
> - imahalfpos = lp_build_cube_imapos(coord_bld, ma);
> - s = lp_build_mul(coord_bld, s, imahalfpos);
> - t = lp_build_mul(coord_bld, t, imahalfpos);
> - r = lp_build_mul(coord_bld, r, imahalfpos);
> -
> - /*
> - * This isn't quite the same as the "ordinary" (3d deriv) path
> since we
> - * know the texture is square which simplifies things (we can omit
> the
> - * size mul which happens very early completely here and do it at
> the
> - * very end).
> - * Also always do calculations according to
> GALLIVM_DEBUG_NO_RHO_APPROX
> - * since the error can get quite big otherwise at edges.
> - * (With no_rho_approx max error is sqrt(2) at edges, same as it is
> - * without no_rho_approx for 2d textures, otherwise it would be
> factor 2.)
> - */
> - ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
> - ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r);
> -
> - ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], ddx_ddy[0]);
> - ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], ddx_ddy[1]);
> -
> - tmp[0] = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle01);
> - tmp[1] = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle23);
> - tmp[2] = lp_build_swizzle_aos(coord_bld, ddx_ddy[1], swizzle02);
> -
> - rho_vec = lp_build_add(coord_bld, tmp[0], tmp[1]);
> - rho_vec = lp_build_add(coord_bld, rho_vec, tmp[2]);
> -
> - tmp[0] = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
> - tmp[1] = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
> - *rho = lp_build_max(coord_bld, tmp[0], tmp[1]);
> + else {
> + ddx[0] = derivs_in->ddx[0];
> + ddx[1] = derivs_in->ddx[1];
> + ddx[2] = derivs_in->ddx[2];
> + ddy[0] = derivs_in->ddy[0];
> + ddy[1] = derivs_in->ddy[1];
> + ddy[2] = derivs_in->ddy[2];
> }
>
> - if (!need_derivs) {
> - ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
> - }
> - mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
> - signmabit = LLVMBuildAnd(builder, mai, signmask, "");
> + /* select major derivatives */
> + madx = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddx[0],
> ddx[1], ddx[2]);
> + mady = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddy[0],
> ddy[1], ddy[2]);
>
> si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
> ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
> ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
>
> + sdxi = LLVMBuildBitCast(builder, ddx[0], cint_vec_type, "");
> + tdxi = LLVMBuildBitCast(builder, ddx[1], cint_vec_type, "");
> + rdxi = LLVMBuildBitCast(builder, ddx[2], cint_vec_type, "");
> +
> + sdyi = LLVMBuildBitCast(builder, ddy[0], cint_vec_type, "");
> + tdyi = LLVMBuildBitCast(builder, ddy[1], cint_vec_type, "");
> + rdyi = LLVMBuildBitCast(builder, ddy[2], cint_vec_type, "");
> +
> /*
> - * compute all possible new s/t coords, which does the mirroring
> + * compute all possible new s/t coords, which does the mirroring,
> + * and do the same for derivs minor axes.
> * snewx = signma * -r;
> * tnewx = -t;
> * snewy = s;
> @@ -1817,164 +1592,200 @@ lp_build_cube_lookup(struct lp_build_sample_context
> *bld,
> */
> tnegi = LLVMBuildXor(builder, ti, signmask, "");
> rnegi = LLVMBuildXor(builder, ri, signmask, "");
> + tdxnegi = LLVMBuildXor(builder, tdxi, signmask, "");
> + rdxnegi = LLVMBuildXor(builder, rdxi, signmask, "");
> + tdynegi = LLVMBuildXor(builder, tdyi, signmask, "");
> + rdynegi = LLVMBuildXor(builder, rdyi, signmask, "");
>
> snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
> tnewx = tnegi;
> + sdxnewx = LLVMBuildXor(builder, signmabit, rdxnegi, "");
> + tdxnewx = tdxnegi;
> + sdynewx = LLVMBuildXor(builder, signmabit, rdynegi, "");
> + tdynewx = tdynegi;
>
> snewy = si;
> tnewy = LLVMBuildXor(builder, signmabit, ri, "");
> + sdxnewy = sdxi;
> + tdxnewy = LLVMBuildXor(builder, signmabit, rdxi, "");
> + sdynewy = sdyi;
> + tdynewy = LLVMBuildXor(builder, signmabit, rdyi, "");
>
> snewz = LLVMBuildXor(builder, signmabit, si, "");
> tnewz = tnegi;
> + sdxnewz = LLVMBuildXor(builder, signmabit, sdxi, "");
> + tdxnewz = tdxnegi;
> + sdynewz = LLVMBuildXor(builder, signmabit, sdyi, "");
> + tdynewz = tdynegi;
>
> /* select the mirrored values */
> + face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey,
> facez);
> face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx,
> snewy, snewz);
> face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx,
> tnewy, tnewz);
> - face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey,
> facez);
> + face_sdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdxnewx,
> sdxnewy, sdxnewz);
> + face_tdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdxnewx,
> tdxnewy, tdxnewz);
> + face_sdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdynewx,
> sdynewy, sdynewz);
> + face_tdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdynewx,
> tdynewy, tdynewz);
>
> face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
> face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
> + face_sdx = LLVMBuildBitCast(builder, face_sdx, coord_vec_type, "");
> + face_tdx = LLVMBuildBitCast(builder, face_tdx, coord_vec_type, "");
> + face_sdy = LLVMBuildBitCast(builder, face_sdy, coord_vec_type, "");
> + face_tdy = LLVMBuildBitCast(builder, face_tdy, coord_vec_type, "");
> +
> + /* deriv math, dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma */
> + madxdivma = lp_build_mul(coord_bld, madx, ima);
> + tmp = lp_build_mul(coord_bld, madxdivma, face_s);
> + tmp = lp_build_sub(coord_bld, face_sdx, tmp);
> + derivs_out->ddx[0] = lp_build_mul(coord_bld, tmp, imahalf);
> +
> + /* dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma */
> + tmp = lp_build_mul(coord_bld, madxdivma, face_t);
> + tmp = lp_build_sub(coord_bld, face_tdx, tmp);
> + derivs_out->ddx[1] = lp_build_mul(coord_bld, tmp, imahalf);
> +
> + /* dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma */
> + madydivma = lp_build_mul(coord_bld, mady, ima);
> + tmp = lp_build_mul(coord_bld, madydivma, face_s);
> + tmp = lp_build_sub(coord_bld, face_sdy, tmp);
> + derivs_out->ddy[0] = lp_build_mul(coord_bld, tmp, imahalf);
> +
> + /* dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma */
> + tmp = lp_build_mul(coord_bld, madydivma, face_t);
> + tmp = lp_build_sub(coord_bld, face_tdy, tmp);
> + derivs_out->ddy[1] = lp_build_mul(coord_bld, tmp, imahalf);
>
> - /* add +1 for neg face */
> - /* XXX with AVX probably want to use another select here -
> - * as long as we ensure vblendvps gets used we can actually
> - * skip the comparison and just use sign as a "mask" directly.
> - */
> signma = LLVMBuildLShr(builder, mai, signshift, "");
> coords[2] = LLVMBuildOr(builder, face, signma, "face");
>
> /* project coords */
> - if (!need_derivs) {
> - imahalfpos = lp_build_cube_imapos(coord_bld, ma);
> - face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
> - face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
> - }
> + face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
> + face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
>
> coords[0] = lp_build_add(coord_bld, face_s, posHalf);
> coords[1] = lp_build_add(coord_bld, face_t, posHalf);
> +
> + return;
> }
>
> - else {
> - struct lp_build_if_state if_ctx;
> - LLVMValueRef face_s_var;
> - LLVMValueRef face_t_var;
> - LLVMValueRef face_var;
> - LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
> - LLVMValueRef shuffles[4];
> - LLVMValueRef arxy_ge_aryx, arxy_ge_arzz, arxy_ge_arxy_arzz;
> - LLVMValueRef arxyxy, aryxzz, arxyxy_ge_aryxzz;
> - LLVMValueRef tmp[4], rxyz, arxyz;
> - struct lp_build_context *float_bld = &bld->float_bld;
> - LLVMValueRef s, t, r, face, face_s, face_t;
> -
> - assert(bld->coord_bld.type.length == 4);
> -
> - tmp[0] = s = coords[0];
> - tmp[1] = t = coords[1];
> - tmp[2] = r = coords[2];
> - rxyz = lp_build_hadd_partial4(&bld->coord_bld, tmp, 3);
> - arxyz = lp_build_abs(&bld->coord_bld, rxyz);
> -
> - shuffles[0] = lp_build_const_int32(gallivm, 0);
> - shuffles[1] = lp_build_const_int32(gallivm, 1);
> - shuffles[2] = lp_build_const_int32(gallivm, 0);
> - shuffles[3] = lp_build_const_int32(gallivm, 1);
> - arxyxy = LLVMBuildShuffleVector(builder, arxyz, arxyz,
> LLVMConstVector(shuffles, 4), "");
> - shuffles[0] = lp_build_const_int32(gallivm, 1);
> - shuffles[1] = lp_build_const_int32(gallivm, 0);
> - shuffles[2] = lp_build_const_int32(gallivm, 2);
> - shuffles[3] = lp_build_const_int32(gallivm, 2);
> - aryxzz = LLVMBuildShuffleVector(builder, arxyz, arxyz,
> LLVMConstVector(shuffles, 4), "");
> - arxyxy_ge_aryxzz = lp_build_cmp(&bld->coord_bld, PIPE_FUNC_GEQUAL,
> arxyxy, aryxzz);
> -
> - shuffles[0] = lp_build_const_int32(gallivm, 0);
> - shuffles[1] = lp_build_const_int32(gallivm, 1);
> - arxy_ge_aryx = LLVMBuildShuffleVector(builder, arxyxy_ge_aryxzz,
> arxyxy_ge_aryxzz,
> - LLVMConstVector(shuffles, 2),
> "");
> - shuffles[0] = lp_build_const_int32(gallivm, 2);
> - shuffles[1] = lp_build_const_int32(gallivm, 3);
> - arxy_ge_arzz = LLVMBuildShuffleVector(builder, arxyxy_ge_aryxzz,
> arxyxy_ge_aryxzz,
> - LLVMConstVector(shuffles, 2),
> "");
> - arxy_ge_arxy_arzz = LLVMBuildAnd(builder, arxy_ge_aryx, arxy_ge_arzz,
> "");
> -
> - arx_ge_ary_arz = LLVMBuildExtractElement(builder, arxy_ge_arxy_arzz,
> - lp_build_const_int32(gallivm,
> 0), "");
> - arx_ge_ary_arz = LLVMBuildICmp(builder, LLVMIntNE, arx_ge_ary_arz,
> - lp_build_const_int32(gallivm,
> 0), "");
> - ary_ge_arx_arz = LLVMBuildExtractElement(builder, arxy_ge_arxy_arzz,
> - lp_build_const_int32(gallivm,
> 1), "");
> - ary_ge_arx_arz = LLVMBuildICmp(builder, LLVMIntNE, ary_ge_arx_arz,
> - lp_build_const_int32(gallivm,
> 0), "");
> - face_s_var = lp_build_alloca(gallivm, bld->coord_bld.vec_type,
> "face_s_var");
> - face_t_var = lp_build_alloca(gallivm, bld->coord_bld.vec_type,
> "face_t_var");
> - face_var = lp_build_alloca(gallivm, bld->int_bld.vec_type,
> "face_var");
> -
> - lp_build_if(&if_ctx, gallivm, arx_ge_ary_arz);
> - {
> - /* +/- X face */
> - LLVMValueRef sign, ima;
> - si = LLVMBuildExtractElement(builder, rxyz,
> - lp_build_const_int32(gallivm, 0), "");
> - /* +/- X face */
> - sign = lp_build_sgn(float_bld, si);
> - ima = lp_build_cube_imaneg(coord_bld, s);
> - face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
> - face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
> - face = lp_build_cube_face(bld, si,
> - PIPE_TEX_FACE_POS_X,
> - PIPE_TEX_FACE_NEG_X);
> - LLVMBuildStore(builder, face_s, face_s_var);
> - LLVMBuildStore(builder, face_t, face_t_var);
> - LLVMBuildStore(builder, face, face_var);
> - }
> - lp_build_else(&if_ctx);
> - {
> - struct lp_build_if_state if_ctx2;
> -
> - lp_build_if(&if_ctx2, gallivm, ary_ge_arx_arz);
> - {
> - LLVMValueRef sign, ima;
> - /* +/- Y face */
> - ti = LLVMBuildExtractElement(builder, rxyz,
> - lp_build_const_int32(gallivm, 1),
> "");
> - sign = lp_build_sgn(float_bld, ti);
> - ima = lp_build_cube_imaneg(coord_bld, t);
> - face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
> - face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
> - face = lp_build_cube_face(bld, ti,
> - PIPE_TEX_FACE_POS_Y,
> - PIPE_TEX_FACE_NEG_Y);
> - LLVMBuildStore(builder, face_s, face_s_var);
> - LLVMBuildStore(builder, face_t, face_t_var);
> - LLVMBuildStore(builder, face, face_var);
> - }
> - lp_build_else(&if_ctx2);
> - {
> - /* +/- Z face */
> - LLVMValueRef sign, ima;
> - ri = LLVMBuildExtractElement(builder, rxyz,
> - lp_build_const_int32(gallivm, 2),
> "");
> - sign = lp_build_sgn(float_bld, ri);
> - ima = lp_build_cube_imaneg(coord_bld, r);
> - face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
> - face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
> - face = lp_build_cube_face(bld, ri,
> - PIPE_TEX_FACE_POS_Z,
> - PIPE_TEX_FACE_NEG_Z);
> - LLVMBuildStore(builder, face_s, face_s_var);
> - LLVMBuildStore(builder, face_t, face_t_var);
> - LLVMBuildStore(builder, face, face_var);
> - }
> - lp_build_endif(&if_ctx2);
> - }
> + else if (need_derivs) {
> + LLVMValueRef ddx_ddy[2], tmp[3], rho_vec;
> + static const unsigned char swizzle0[] = { /* no-op swizzle */
> + 0, LP_BLD_SWIZZLE_DONTCARE,
> + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> + };
> + static const unsigned char swizzle1[] = {
> + 1, LP_BLD_SWIZZLE_DONTCARE,
> + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> + };
> + static const unsigned char swizzle01[] = { /* no-op swizzle */
> + 0, 1,
> + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> + };
> + static const unsigned char swizzle23[] = {
> + 2, 3,
> + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> + };
> + static const unsigned char swizzle02[] = {
> + 0, 2,
> + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> + };
> +
> + /*
> + * scale the s/t/r coords pre-select/mirror so we can calculate
> + * "reasonable" derivs.
> + */
> + ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
> + imahalfpos = lp_build_cube_imapos(coord_bld, ma);
> + s = lp_build_mul(coord_bld, s, imahalfpos);
> + t = lp_build_mul(coord_bld, t, imahalfpos);
> + r = lp_build_mul(coord_bld, r, imahalfpos);
> +
> + /*
> + * This isn't quite the same as the "ordinary" (3d deriv) path since
> we
> + * know the texture is square which simplifies things (we can omit the
> + * size mul which happens very early completely here and do it at the
> + * very end).
> + * Also always do calculations according to
> GALLIVM_DEBUG_NO_RHO_APPROX
> + * since the error can get quite big otherwise at edges.
> + * (With no_rho_approx max error is sqrt(2) at edges, same as it is
> + * without no_rho_approx for 2d textures, otherwise it would be factor
> 2.)
> + */
> + ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
> + ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r);
>
> - lp_build_endif(&if_ctx);
> + ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], ddx_ddy[0]);
> + ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], ddx_ddy[1]);
>
> - coords[0] = LLVMBuildLoad(builder, face_s_var, "face_s");
> - coords[1] = LLVMBuildLoad(builder, face_t_var, "face_t");
> - face = LLVMBuildLoad(builder, face_var, "face");
> - coords[2] = lp_build_broadcast_scalar(&bld->int_coord_bld, face);
> + tmp[0] = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle01);
> + tmp[1] = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle23);
> + tmp[2] = lp_build_swizzle_aos(coord_bld, ddx_ddy[1], swizzle02);
> +
> + rho_vec = lp_build_add(coord_bld, tmp[0], tmp[1]);
> + rho_vec = lp_build_add(coord_bld, rho_vec, tmp[2]);
> +
> + tmp[0] = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
> + tmp[1] = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
> + *rho = lp_build_max(coord_bld, tmp[0], tmp[1]);
> }
> +
> + if (!need_derivs) {
> + ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
> + }
> + mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
> + signmabit = LLVMBuildAnd(builder, mai, signmask, "");
> +
> + si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
> + ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
> + ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
> +
> + /*
> + * compute all possible new s/t coords, which does the mirroring
> + * snewx = signma * -r;
> + * tnewx = -t;
> + * snewy = s;
> + * tnewy = signma * r;
> + * snewz = signma * s;
> + * tnewz = -t;
> + */
> + tnegi = LLVMBuildXor(builder, ti, signmask, "");
> + rnegi = LLVMBuildXor(builder, ri, signmask, "");
> +
> + snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
> + tnewx = tnegi;
> +
> + snewy = si;
> + tnewy = LLVMBuildXor(builder, signmabit, ri, "");
> +
> + snewz = LLVMBuildXor(builder, signmabit, si, "");
> + tnewz = tnegi;
> +
> + /* select the mirrored values */
> + face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy,
> snewz);
> + face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy,
> tnewz);
> + face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey,
> facez);
> +
> + face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
> + face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
> +
> + /* add +1 for neg face */
> + /* XXX with AVX probably want to use another select here -
> + * as long as we ensure vblendvps gets used we can actually
> + * skip the comparison and just use sign as a "mask" directly.
> + */
> + signma = LLVMBuildLShr(builder, mai, signshift, "");
> + coords[2] = LLVMBuildOr(builder, face, signma, "face");
> +
> + /* project coords */
> + if (!need_derivs) {
> + imahalfpos = lp_build_cube_imapos(coord_bld, ma);
> + face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
> + face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
> + }
> +
> + coords[0] = lp_build_add(coord_bld, face_s, posHalf);
> + coords[1] = lp_build_add(coord_bld, face_t, posHalf);
> }
>
>
> --
> 1.7.9.5
>
More information about the mesa-dev
mailing list