[Mesa-dev] [PATCH 2/3] gallivm: handle explicit derivatives for cubemaps
Brian Paul
brianp at vmware.com
Thu Oct 3 12:39:13 PDT 2013
On 10/03/2013 09:42 AM, sroland at vmware.com wrote:
> From: Roland Scheidegger <sroland at vmware.com>
>
> They need some special handling. Quite complicated.
> Additionally, use the same code for implicit derivatives too if no_rho_approx
> and no_quad_lod is set, because it seems while generally it should be ok
> to use per quad lod for implicit derivatives there's at least some test which
> insists that in case of cubemaps the shared lod value MUST come from a pixel
> inside the primitive (due to the derivatives becoming different if a different
> larger major axis is chosen).
> ---
> src/gallium/auxiliary/gallivm/lp_bld_sample.c | 221 +++++++++++++++++++--
> src/gallium/auxiliary/gallivm/lp_bld_sample.h | 3 +-
> src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 35 +++-
> 3 files changed, 231 insertions(+), 28 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> index ea6bec7..ce05522 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> @@ -273,7 +273,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
> cubesize = lp_build_mul(rho_bld, cubesize, cubesize);
> rho = lp_build_mul(rho_bld, cubesize, rho);
> }
> - else if (derivs && !(bld->static_texture_state->target == PIPE_TEXTURE_CUBE)) {
> + else if (derivs) {
> LLVMValueRef ddmax[3], ddx[3], ddy[3];
> for (i = 0; i < dims; i++) {
> LLVMValueRef floatdim;
> @@ -1488,8 +1488,9 @@ lp_build_cube_face(struct lp_build_sample_context *bld,
> void
> lp_build_cube_lookup(struct lp_build_sample_context *bld,
> LLVMValueRef *coords,
> - const struct lp_derivatives *derivs, /* optional */
> + const struct lp_derivatives *derivs_in, /* optional */
> LLVMValueRef *rho,
> + struct lp_derivatives *derivs_out, /* optional */
> boolean need_derivs)
> {
> struct lp_build_context *coord_bld = &bld->coord_bld;
> @@ -1512,8 +1513,6 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
> * the edge). Still this is possibly a win over just selecting the same face
> * for all pixels. Unfortunately, something like that doesn't work for
> * explicit derivatives.
> - * TODO: handle explicit derivatives by transforming them alongside coords
> - * somehow.
> */
> struct lp_build_context *cint_bld = &bld->int_coord_bld;
> struct lp_type intctype = cint_bld->type;
> @@ -1522,7 +1521,7 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
> LLVMValueRef as_ge_at, maxasat, ar_ge_as_at;
> LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz;
> LLVMValueRef tnegi, rnegi;
> - LLVMValueRef ma, mai, ima;
> + LLVMValueRef ma, mai, imahalfpos;
> LLVMValueRef posHalf = lp_build_const_vec(gallivm, coord_bld->type, 0.5);
> LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype,
> 1 << (intctype.width - 1));
> @@ -1561,7 +1560,195 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
> maxasat = lp_build_max(coord_bld, as, at);
> ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat);
>
> - if (need_derivs) {
> + if (need_derivs && (derivs_in ||
> + ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
> + (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX)))) {
> + /*
> + * XXX: This is really really complex.
> + * It is a bit overkill to use this for implicit derivatives as well,
> + * no way this is worth the cost in practice, but seems to be the
> + * only way for getting accurate and per-pixel lod values.
> + */
> + LLVMValueRef imapos, tmp, ddx[3], ddy[3];
> + LLVMValueRef madx, mady, madxdivma, madydivma;
> + LLVMValueRef sdxi, tdxi, rdxi, signsdx, signtdx, signrdx;
> + LLVMValueRef sdyi, tdyi, rdyi, signsdy, signtdy, signrdy;
> + LLVMValueRef tdxnegi, rdxnegi, tdynegi, rdynegi;
> + LLVMValueRef sdxnewx, sdxnewy, sdxnewz, tdxnewx, tdxnewy, tdxnewz;
> + LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz;
> + LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy;
> + LLVMValueRef posHalf = lp_build_const_vec(coord_bld->gallivm,
> + coord_bld->type, 0.5);
> + /*
> + * s = 1/2 * ( sc / ma + 1)
> + * t = 1/2 * ( tc / ma + 1)
> + *
> + * s' = 1/2 * (sc' * ma - sc * ma') / ma^2
> + * t' = 1/2 * (tc' * ma - tc * ma') / ma^2
> + *
> + * dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma
> + * dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma
> + * dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma
> + * dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma
> + */
> +
> + /* select ma, calculate ima */
> + ma = lp_build_select(coord_bld, as_ge_at, s, t);
> + ma = lp_build_select(coord_bld, ar_ge_as_at, r, ma);
> + imapos = lp_build_abs(coord_bld, ma);
> + imapos = lp_build_div(coord_bld, coord_bld->one, imapos);
> + imahalfpos = lp_build_mul(coord_bld, posHalf, imapos);
> +
> + if (!derivs_in) {
> + ddx[0] = lp_build_ddx(coord_bld, s);
> + ddx[1] = lp_build_ddx(coord_bld, t);
> + ddx[2] = lp_build_ddx(coord_bld, r);
> + ddy[0] = lp_build_ddy(coord_bld, s);
> + ddy[1] = lp_build_ddy(coord_bld, t);
> + ddy[2] = lp_build_ddy(coord_bld, r);
> + }
> + else {
> + ddx[0] = derivs_in->ddx[0];
> + ddx[1] = derivs_in->ddx[1];
> + ddx[2] = derivs_in->ddx[2];
> + ddy[0] = derivs_in->ddy[0];
> + ddy[1] = derivs_in->ddy[1];
> + ddy[2] = derivs_in->ddy[2];
> + }
> +
> + /* select major derivatives */
> + madx = lp_build_select(coord_bld, as_ge_at, ddx[0], ddx[1]);
> + madx = lp_build_select(coord_bld, ar_ge_as_at, ddx[2], madx);
> + madx = lp_build_abs(coord_bld, madx);
There's quite a few of these double lp_bould_select() sequences here and
below.
It might be nice to wrap this up in a utility function along the lines of:
select3(c1, c2, x, y, z)
{
if c1, return a;
else if c2, return b;
else return c;
}
It would only reduce two lines to one, but it might be more readable.
> + mady = lp_build_select(coord_bld, as_ge_at, ddy[0], ddy[1]);
> + mady = lp_build_select(coord_bld, ar_ge_as_at, ddy[2], mady);
> + mady = lp_build_abs(coord_bld, mady);
> +
> + si = LLVMBuildBitCast(builder, s, lp_build_vec_type(gallivm, intctype), "");
> + ti = LLVMBuildBitCast(builder, t, lp_build_vec_type(gallivm, intctype), "");
> + ri = LLVMBuildBitCast(builder, r, lp_build_vec_type(gallivm, intctype), "");
> + signs = LLVMBuildAnd(builder, si, signmask, "");
> + signt = LLVMBuildAnd(builder, ti, signmask, "");
> + signr = LLVMBuildAnd(builder, ri, signmask, "");
> +
> + sdxi = LLVMBuildBitCast(builder, ddx[0], lp_build_vec_type(gallivm, intctype), "");
> + tdxi = LLVMBuildBitCast(builder, ddx[1], lp_build_vec_type(gallivm, intctype), "");
> + rdxi = LLVMBuildBitCast(builder, ddx[2], lp_build_vec_type(gallivm, intctype), "");
> + signsdx = LLVMBuildAnd(builder, sdxi, signmask, "");
> + signtdx = LLVMBuildAnd(builder, tdxi, signmask, "");
> + signrdx = LLVMBuildAnd(builder, rdxi, signmask, "");
> +
> + sdyi = LLVMBuildBitCast(builder, ddy[0], lp_build_vec_type(gallivm, intctype), "");
> + tdyi = LLVMBuildBitCast(builder, ddy[1], lp_build_vec_type(gallivm, intctype), "");
> + rdyi = LLVMBuildBitCast(builder, ddy[2], lp_build_vec_type(gallivm, intctype), "");
> + signsdy = LLVMBuildAnd(builder, sdyi, signmask, "");
> + signtdy = LLVMBuildAnd(builder, tdyi, signmask, "");
> + signrdy = LLVMBuildAnd(builder, rdyi, signmask, "");
Lots of similar lines of code. Would a get_sign(vec) helper be useful?
And maybe put lp_build_vec_type(gallivm, intctype), in a local var?
> + /*
> + * compute all possible new s/t coords, same for derivs
> + * snewx = signs * -r;
> + * tnewx = -t;
> + * snewy = s;
> + * tnewy = signt * r;
> + * snewz = signr * s;
> + * tnewz = -t;
> + */
> + tnegi = LLVMBuildXor(builder, ti, signmask, "");
> + rnegi = LLVMBuildXor(builder, ri, signmask, "");
> + tdxnegi = LLVMBuildXor(builder, tdxi, signmask, "");
> + rdxnegi = LLVMBuildXor(builder, rdxi, signmask, "");
> + tdynegi = LLVMBuildXor(builder, tdyi, signmask, "");
> + rdynegi = LLVMBuildXor(builder, rdyi, signmask, "");
> +
> + snewx = LLVMBuildXor(builder, signs, rnegi, "");
> + tnewx = tnegi;
> + sdxnewx = LLVMBuildXor(builder, signsdx, rdxnegi, "");
> + tdxnewx = tdxnegi;
> + sdynewx = LLVMBuildXor(builder, signsdy, rdynegi, "");
> + tdynewx = tdynegi;
> +
> + snewy = si;
> + tnewy = LLVMBuildXor(builder, signt, ri, "");
> + sdxnewy = sdxi;
> + tdxnewy = LLVMBuildXor(builder, signtdx, rdxi, "");
> + sdynewy = sdyi;
> + tdynewy = LLVMBuildXor(builder, signtdy, rdyi, "");
> +
> + snewz = LLVMBuildXor(builder, signr, si, "");
> + tnewz = tnegi;
> + sdxnewz = LLVMBuildXor(builder, signrdx, sdxi, "");
> + tdxnewz = tdxnegi;
> + sdynewz = LLVMBuildXor(builder, signrdy, sdyi, "");
> + tdynewz = tdynegi;
> +
> + /* select/mirror */
> + face_s = lp_build_select(cint_bld, as_ge_at, snewx, snewy);
> + face_t = lp_build_select(cint_bld, as_ge_at, tnewx, tnewy);
> + face = lp_build_select(cint_bld, as_ge_at, facex, facey);
> + face_sdx = lp_build_select(cint_bld, as_ge_at, sdxnewx, sdxnewy);
> + face_tdx = lp_build_select(cint_bld, as_ge_at, tdxnewx, tdxnewy);
> + face_sdy = lp_build_select(cint_bld, as_ge_at, sdynewx, sdynewy);
> + face_tdy = lp_build_select(cint_bld, as_ge_at, tdynewx, tdynewy);
> +
> + face_s = lp_build_select(cint_bld, ar_ge_as_at, snewz, face_s);
> + face_t = lp_build_select(cint_bld, ar_ge_as_at, tnewz, face_t);
> + face = lp_build_select(cint_bld, ar_ge_as_at, facez, face);
> + face_sdx = lp_build_select(cint_bld, ar_ge_as_at, sdxnewz, face_sdx);
> + face_tdx = lp_build_select(cint_bld, ar_ge_as_at, tdxnewz, face_tdx);
> + face_sdy = lp_build_select(cint_bld, ar_ge_as_at, sdynewz, face_sdy);
> + face_tdy = lp_build_select(cint_bld, ar_ge_as_at, tdynewz, face_tdy);
> +
> + face_s = LLVMBuildBitCast(builder, face_s,
> + lp_build_vec_type(gallivm, coord_bld->type), "");
> + face_t = LLVMBuildBitCast(builder, face_t,
> + lp_build_vec_type(gallivm, coord_bld->type), "");
> + face_sdx = LLVMBuildBitCast(builder, face_sdx,
> + lp_build_vec_type(gallivm, coord_bld->type), "");
> + face_tdx = LLVMBuildBitCast(builder, face_tdx,
> + lp_build_vec_type(gallivm, coord_bld->type), "");
> + face_sdy = LLVMBuildBitCast(builder, face_sdy,
> + lp_build_vec_type(gallivm, coord_bld->type), "");
> + face_tdy = LLVMBuildBitCast(builder, face_tdy,
> + lp_build_vec_type(gallivm, coord_bld->type), "");
Maybe put lp_build_vec_type(gallivm, coord_bld->type) in a local var?
> +
> + /* deriv math, dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma */
> + madxdivma = lp_build_mul(coord_bld, madx, imapos);
> + tmp = lp_build_mul(coord_bld, madxdivma, face_s);
> + tmp = lp_build_sub(coord_bld, face_sdx, tmp);
> + derivs_out->ddx[0] = lp_build_mul(coord_bld, tmp, imahalfpos);
> +
> + /* dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma */
> + tmp = lp_build_mul(coord_bld, madxdivma, face_t);
> + tmp = lp_build_sub(coord_bld, face_tdx, tmp);
> + derivs_out->ddx[1] = lp_build_mul(coord_bld, tmp, imahalfpos);
> +
> + /* dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma */
> + madydivma = lp_build_mul(coord_bld, mady, imapos);
> + tmp = lp_build_mul(coord_bld, madydivma, face_s);
> + tmp = lp_build_sub(coord_bld, face_sdy, tmp);
> + derivs_out->ddy[0] = lp_build_mul(coord_bld, tmp, imahalfpos);
> +
> + /* dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma */
> + tmp = lp_build_mul(coord_bld, madydivma, face_t);
> + tmp = lp_build_sub(coord_bld, face_tdy, tmp);
> + derivs_out->ddy[1] = lp_build_mul(coord_bld, tmp, imahalfpos);
> +
> + mai = LLVMBuildBitCast(builder, ma, lp_build_vec_type(gallivm, intctype), "");
> + signma = LLVMBuildLShr(builder, mai, signshift, "");
> + coords[2] = LLVMBuildOr(builder, face, signma, "face");
> +
> + /* project coords */
> + face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
> + face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
> +
> + coords[0] = lp_build_add(coord_bld, face_s, posHalf);
> + coords[1] = lp_build_add(coord_bld, face_t, posHalf);
> +
> + return;
> + }
> +
> + else if (need_derivs) {
> LLVMValueRef ddx_ddy[2], tmp[3], rho_vec;
> static const unsigned char swizzle0[] = { /* no-op swizzle */
> 0, LP_BLD_SWIZZLE_DONTCARE,
> @@ -1590,10 +1777,10 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
> */
> ma = lp_build_select(coord_bld, as_ge_at, s, t);
> ma = lp_build_select(coord_bld, ar_ge_as_at, r, ma);
> - ima = lp_build_cube_imapos(coord_bld, ma);
> - s = lp_build_mul(coord_bld, s, ima);
> - t = lp_build_mul(coord_bld, t, ima);
> - r = lp_build_mul(coord_bld, r, ima);
> + imahalfpos = lp_build_cube_imapos(coord_bld, ma);
> + s = lp_build_mul(coord_bld, s, imahalfpos);
> + t = lp_build_mul(coord_bld, t, imahalfpos);
> + r = lp_build_mul(coord_bld, r, imahalfpos);
>
> /*
> * This isn't quite the same as the "ordinary" (3d deriv) path since we
> @@ -1651,14 +1838,6 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
> snewz = LLVMBuildXor(builder, signr, si, "");
> tnewz = tnegi;
>
> - /* XXX on x86 unclear if we should cast the values back to float
> - * or not - on some cpus (nehalem) pblendvb has twice the throughput
> - * of blendvps though on others there just might be domain
> - * transition penalties when using it (this depends on what llvm
> - * will chose for the bit ops above so there appears no "right way",
> - * but given the boatload of selects let's just use the int type).
> - */
> -
> /* select/mirror */
> if (!need_derivs) {
> ma = lp_build_select(coord_bld, as_ge_at, s, t);
> @@ -1690,9 +1869,9 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
>
> /* project coords */
> if (!need_derivs) {
> - ima = lp_build_cube_imapos(coord_bld, ma);
> - face_s = lp_build_mul(coord_bld, face_s, ima);
> - face_t = lp_build_mul(coord_bld, face_t, ima);
> + imahalfpos = lp_build_cube_imapos(coord_bld, ma);
> + face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
> + face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
> }
>
> coords[0] = lp_build_add(coord_bld, face_s, posHalf);
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> index 803a99e..70f0350 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> @@ -457,8 +457,9 @@ lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
> void
> lp_build_cube_lookup(struct lp_build_sample_context *bld,
> LLVMValueRef *coords,
> - const struct lp_derivatives *derivs, /* optional */
> + const struct lp_derivatives *derivs_in, /* optional */
> LLVMValueRef *rho,
> + struct lp_derivatives *derivs_out, /* optional */
> boolean need_derivs);
>
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> index 33378bc..54dee25 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> @@ -1387,6 +1387,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
> const unsigned target = bld->static_texture_state->target;
> LLVMValueRef first_level, cube_rho = NULL;
> LLVMValueRef lod_ipart = NULL;
> + struct lp_derivatives cube_derivs;
>
> /*
> printf("%s mip %d min %d mag %d\n", __FUNCTION__,
> @@ -1403,7 +1404,8 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
> mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
> !bld->static_sampler_state->min_max_lod_equal &&
> !explicit_lod);
> - lp_build_cube_lookup(bld, coords, derivs, &cube_rho, need_derivs);
> + lp_build_cube_lookup(bld, coords, derivs, &cube_rho, &cube_derivs, need_derivs);
> + derivs = &cube_derivs;
> }
> else if (target == PIPE_TEXTURE_1D_ARRAY ||
> target == PIPE_TEXTURE_2D_ARRAY) {
> @@ -2163,9 +2165,24 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> * avoided like min and max lod being equal.
> */
> bld.num_mips = bld.num_lods = 1;
> - if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
> - (explicit_lod || lod_bias ||
> - (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) {
> +
> + if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
> + (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
> + (static_texture_state->target == PIPE_TEXTURE_CUBE) &&
> + (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
> + /*
> + * special case for using per-pixel lod even for implicit lod,
> + * which is generally never required (ok by APIs) except to please
> + * some (somewhat broken imho) tests (because per-pixel face selection
> + * can cause derivatives to be different for pixels outside the primitive
> + * due to the major axis division even if pre-project derivatives are
> + * looking normal).
> + */
> + bld.num_mips = type.length;
> + bld.num_lods = type.length;
> + }
> + else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
> + (explicit_lod || lod_bias || derivs)) {
> if ((is_fetch && target != PIPE_BUFFER) ||
> (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
> bld.num_mips = type.length;
> @@ -2371,9 +2388,15 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> bld4.texel_type.length = 4;
>
> bld4.num_mips = bld4.num_lods = 1;
> + if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
> + (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
> + (static_texture_state->target == PIPE_TEXTURE_CUBE) &&
> + (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
> + bld4.num_mips = type4.length;
> + bld4.num_lods = type4.length;
> + }
> if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
> - (explicit_lod || lod_bias ||
> - (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) {
> + (explicit_lod || lod_bias || derivs)) {
> if ((is_fetch && target != PIPE_BUFFER) ||
> (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
> bld4.num_mips = type4.length;
>
More information about the mesa-dev
mailing list