[Mesa-dev] [PATCH 2/3] gallivm: handle explicit derivatives for cubemaps

Wed Oct 9 14:16:29 PDT 2013

Looks good to me. v2 changes seem the right ones.

Jose

----- Original Message -----
> From: Roland Scheidegger <sroland at vmware.com>
> 
> They need some special handling. Quite complicated.
> Additionally, use the same code for implicit derivatives too if no_rho_approx
> and no_quad_lod is set, because it seems while generally it should be ok
> to use per quad lod for implicit derivatives there's at least some test which
> insists that in case of cubemaps the shared lod value MUST come from a pixel
> inside the primitive (due to the derivatives becoming different if a
> different
> larger major axis is chosen).
> 
> v2: based on Brian's feedback, clean up code a bit.
> And use sign bit of major axis instead of pre-select s/t/r sign for coord
> mirroring (which should be the same in the end, saves 2 ands).
> Also fix two bugs with select/mirror of derivatives, the minor axes need to
> use major axis sign as well (instead of major derivative axis sign), and
> don't mistakenly use absolute values of major derivative and inverse major
> values.
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_sample.c     |  253
>  +++++++++++++++++----
>  src/gallium/auxiliary/gallivm/lp_bld_sample.h     |    3 +-
>  src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |   35 ++-
>  3 files changed, 235 insertions(+), 56 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> index dc593aa..39c3a2f 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> @@ -273,7 +273,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
>        cubesize = lp_build_mul(rho_bld, cubesize, cubesize);
>        rho = lp_build_mul(rho_bld, cubesize, rho);
>     }
> -   else if (derivs && !(bld->static_texture_state->target ==
> PIPE_TEXTURE_CUBE)) {
> +   else if (derivs) {
>        LLVMValueRef ddmax[3], ddx[3], ddy[3];
>        for (i = 0; i < dims; i++) {
>           LLVMValueRef floatdim;
> @@ -1481,6 +1481,21 @@ lp_build_cube_face(struct lp_build_sample_context
> *bld,
>  }
>  
>  
> +/** Helper for doing 3-wise selection.
> + * Returns sel1 ? val2 : (sel0 ? val0 : val1).
> + */
> +static LLVMValueRef
> +lp_build_select3(struct lp_build_context *sel_bld,
> +                 LLVMValueRef sel0,
> +                 LLVMValueRef sel1,
> +                 LLVMValueRef val0,
> +                 LLVMValueRef val1,
> +                 LLVMValueRef val2)
> +{
> +   LLVMValueRef tmp;
> +   tmp = lp_build_select(sel_bld, sel0, val0, val1);
> +   return lp_build_select(sel_bld, sel1, val2, tmp);
> +}
>  
>  /**
>   * Generate code to do cube face selection and compute per-face texcoords.
> @@ -1488,8 +1503,9 @@ lp_build_cube_face(struct lp_build_sample_context *bld,
>  void
>  lp_build_cube_lookup(struct lp_build_sample_context *bld,
>                       LLVMValueRef *coords,
> -                     const struct lp_derivatives *derivs, /* optional */
> +                     const struct lp_derivatives *derivs_in, /* optional */
>                       LLVMValueRef *rho,
> +                     struct lp_derivatives *derivs_out, /* optional */
>                       boolean need_derivs)
>  {
>     struct lp_build_context *coord_bld = &bld->coord_bld;
> @@ -1512,19 +1528,16 @@ lp_build_cube_lookup(struct lp_build_sample_context
> *bld,
>         * the edge). Still this is possibly a win over just selecting the
>         same face
>         * for all pixels. Unfortunately, something like that doesn't work for
>         * explicit derivatives.
> -       * TODO: handle explicit derivatives by transforming them alongside
> coords
> -       * somehow.
>         */
>        struct lp_build_context *cint_bld = &bld->int_coord_bld;
>        struct lp_type intctype = cint_bld->type;
>        LLVMTypeRef coord_vec_type = coord_bld->vec_type;
>        LLVMTypeRef cint_vec_type = cint_bld->vec_type;
> -      LLVMValueRef signs, signt, signr, signma;
>        LLVMValueRef as, at, ar, face, face_s, face_t;
>        LLVMValueRef as_ge_at, maxasat, ar_ge_as_at;
>        LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz;
>        LLVMValueRef tnegi, rnegi;
> -      LLVMValueRef ma, mai, ima;
> +      LLVMValueRef ma, mai, signma, signmabit, imahalfpos;
>        LLVMValueRef posHalf = lp_build_const_vec(gallivm, coord_bld->type,
>        0.5);
>        LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype,
>                                                       1 << (intctype.width -
>                                                       1));
> @@ -1563,7 +1576,166 @@ lp_build_cube_lookup(struct lp_build_sample_context
> *bld,
>        maxasat = lp_build_max(coord_bld, as, at);
>        ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat);
>  
> -      if (need_derivs) {
> +      if (need_derivs && (derivs_in ||
> +          ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
> +           (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX)))) {
> +         /*
> +          * XXX: This is really really complex.
> +          * It is a bit overkill to use this for implicit derivatives as
> well,
> +          * no way this is worth the cost in practice, but seems to be the
> +          * only way for getting accurate and per-pixel lod values.
> +          */
> +         LLVMValueRef ima, imahalf, tmp, ddx[3], ddy[3];
> +         LLVMValueRef madx, mady, madxdivma, madydivma;
> +         LLVMValueRef sdxi, tdxi, rdxi, sdyi, tdyi, rdyi;
> +         LLVMValueRef tdxnegi, rdxnegi, tdynegi, rdynegi;
> +         LLVMValueRef sdxnewx, sdxnewy, sdxnewz, tdxnewx, tdxnewy, tdxnewz;
> +         LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz;
> +         LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy;
> +         /*
> +          * s = 1/2 * ( sc / ma + 1)
> +          * t = 1/2 * ( tc / ma + 1)
> +          *
> +          * s' = 1/2 * (sc' * ma - sc * ma') / ma^2
> +          * t' = 1/2 * (tc' * ma - tc * ma') / ma^2
> +          *
> +          * dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma
> +          * dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma
> +          * dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma
> +          * dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma
> +          */
> +
> +         /* select ma, calculate ima */
> +         ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
> +         mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
> +         signmabit = LLVMBuildAnd(builder, mai, signmask, "");
> +         ima = lp_build_div(coord_bld, coord_bld->one, ma);
> +         imahalf = lp_build_mul(coord_bld, posHalf, ima);
> +         imahalfpos = lp_build_abs(coord_bld, imahalf);
> +
> +         if (!derivs_in) {
> +            ddx[0] = lp_build_ddx(coord_bld, s);
> +            ddx[1] = lp_build_ddx(coord_bld, t);
> +            ddx[2] = lp_build_ddx(coord_bld, r);
> +            ddy[0] = lp_build_ddy(coord_bld, s);
> +            ddy[1] = lp_build_ddy(coord_bld, t);
> +            ddy[2] = lp_build_ddy(coord_bld, r);
> +         }
> +         else {
> +            ddx[0] = derivs_in->ddx[0];
> +            ddx[1] = derivs_in->ddx[1];
> +            ddx[2] = derivs_in->ddx[2];
> +            ddy[0] = derivs_in->ddy[0];
> +            ddy[1] = derivs_in->ddy[1];
> +            ddy[2] = derivs_in->ddy[2];
> +         }
> +
> +         /* select major derivatives */
> +         madx = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddx[0],
> ddx[1], ddx[2]);
> +         mady = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddy[0],
> ddy[1], ddy[2]);
> +
> +         si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
> +         ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
> +         ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
> +
> +         sdxi = LLVMBuildBitCast(builder, ddx[0], cint_vec_type, "");
> +         tdxi = LLVMBuildBitCast(builder, ddx[1], cint_vec_type, "");
> +         rdxi = LLVMBuildBitCast(builder, ddx[2], cint_vec_type, "");
> +
> +         sdyi = LLVMBuildBitCast(builder, ddy[0], cint_vec_type, "");
> +         tdyi = LLVMBuildBitCast(builder, ddy[1], cint_vec_type, "");
> +         rdyi = LLVMBuildBitCast(builder, ddy[2], cint_vec_type, "");
> +
> +         /*
> +          * compute all possible new s/t coords, which does the mirroring,
> +          * and do the same for derivs minor axes.
> +          * snewx = signma * -r;
> +          * tnewx = -t;
> +          * snewy = s;
> +          * tnewy = signma * r;
> +          * snewz = signma * s;
> +          * tnewz = -t;
> +          */
> +         tnegi = LLVMBuildXor(builder, ti, signmask, "");
> +         rnegi = LLVMBuildXor(builder, ri, signmask, "");
> +         tdxnegi = LLVMBuildXor(builder, tdxi, signmask, "");
> +         rdxnegi = LLVMBuildXor(builder, rdxi, signmask, "");
> +         tdynegi = LLVMBuildXor(builder, tdyi, signmask, "");
> +         rdynegi = LLVMBuildXor(builder, rdyi, signmask, "");
> +
> +         snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
> +         tnewx = tnegi;
> +         sdxnewx = LLVMBuildXor(builder, signmabit, rdxnegi, "");
> +         tdxnewx = tdxnegi;
> +         sdynewx = LLVMBuildXor(builder, signmabit, rdynegi, "");
> +         tdynewx = tdynegi;
> +
> +         snewy = si;
> +         tnewy = LLVMBuildXor(builder, signmabit, ri, "");
> +         sdxnewy = sdxi;
> +         tdxnewy = LLVMBuildXor(builder, signmabit, rdxi, "");
> +         sdynewy = sdyi;
> +         tdynewy = LLVMBuildXor(builder, signmabit, rdyi, "");
> +
> +         snewz = LLVMBuildXor(builder, signmabit, si, "");
> +         tnewz = tnegi;
> +         sdxnewz = LLVMBuildXor(builder, signmabit, sdxi, "");
> +         tdxnewz = tdxnegi;
> +         sdynewz = LLVMBuildXor(builder, signmabit, sdyi, "");
> +         tdynewz = tdynegi;
> +
> +         /* select the mirrored values */
> +         face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex,
> facey, facez);
> +         face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx,
> snewy, snewz);
> +         face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx,
> tnewy, tnewz);
> +         face_sdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at,
> sdxnewx, sdxnewy, sdxnewz);
> +         face_tdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at,
> tdxnewx, tdxnewy, tdxnewz);
> +         face_sdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at,
> sdynewx, sdynewy, sdynewz);
> +         face_tdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at,
> tdynewx, tdynewy, tdynewz);
> +
> +         face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
> +         face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
> +         face_sdx = LLVMBuildBitCast(builder, face_sdx, coord_vec_type, "");
> +         face_tdx = LLVMBuildBitCast(builder, face_tdx, coord_vec_type, "");
> +         face_sdy = LLVMBuildBitCast(builder, face_sdy, coord_vec_type, "");
> +         face_tdy = LLVMBuildBitCast(builder, face_tdy, coord_vec_type, "");
> +
> +         /* deriv math, dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma */
> +         madxdivma = lp_build_mul(coord_bld, madx, ima);
> +         tmp = lp_build_mul(coord_bld, madxdivma, face_s);
> +         tmp = lp_build_sub(coord_bld, face_sdx, tmp);
> +         derivs_out->ddx[0] = lp_build_mul(coord_bld, tmp, imahalf);
> +
> +         /* dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma */
> +         tmp = lp_build_mul(coord_bld, madxdivma, face_t);
> +         tmp = lp_build_sub(coord_bld, face_tdx, tmp);
> +         derivs_out->ddx[1] = lp_build_mul(coord_bld, tmp, imahalf);
> +
> +         /* dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma */
> +         madydivma = lp_build_mul(coord_bld, mady, ima);
> +         tmp = lp_build_mul(coord_bld, madydivma, face_s);
> +         tmp = lp_build_sub(coord_bld, face_sdy, tmp);
> +         derivs_out->ddy[0] = lp_build_mul(coord_bld, tmp, imahalf);
> +
> +         /* dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma */
> +         tmp = lp_build_mul(coord_bld, madydivma, face_t);
> +         tmp = lp_build_sub(coord_bld, face_tdy, tmp);
> +         derivs_out->ddy[1] = lp_build_mul(coord_bld, tmp, imahalf);
> +
> +         signma = LLVMBuildLShr(builder, mai, signshift, "");
> +         coords[2] = LLVMBuildOr(builder, face, signma, "face");
> +
> +         /* project coords */
> +         face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
> +         face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
> +
> +         coords[0] = lp_build_add(coord_bld, face_s, posHalf);
> +         coords[1] = lp_build_add(coord_bld, face_t, posHalf);
> +
> +         return;
> +      }
> +
> +      else if (need_derivs) {
>           LLVMValueRef ddx_ddy[2], tmp[3], rho_vec;
>           static const unsigned char swizzle0[] = { /* no-op swizzle */
>              0, LP_BLD_SWIZZLE_DONTCARE,
> @@ -1590,12 +1762,11 @@ lp_build_cube_lookup(struct lp_build_sample_context
> *bld,
>            * scale the s/t/r coords pre-select/mirror so we can calculate
>            * "reasonable" derivs.
>            */
> -         ma = lp_build_select(coord_bld, as_ge_at, s, t);
> -         ma = lp_build_select(coord_bld, ar_ge_as_at, r, ma);
> -         ima = lp_build_cube_imapos(coord_bld, ma);
> -         s = lp_build_mul(coord_bld, s, ima);
> -         t = lp_build_mul(coord_bld, t, ima);
> -         r = lp_build_mul(coord_bld, r, ima);
> +         ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
> +         imahalfpos = lp_build_cube_imapos(coord_bld, ma);
> +         s = lp_build_mul(coord_bld, s, imahalfpos);
> +         t = lp_build_mul(coord_bld, t, imahalfpos);
> +         r = lp_build_mul(coord_bld, r, imahalfpos);
>  
>           /*
>            * This isn't quite the same as the "ordinary" (3d deriv) path
>            since we
> @@ -1625,56 +1796,41 @@ lp_build_cube_lookup(struct lp_build_sample_context
> *bld,
>           *rho = lp_build_max(coord_bld, tmp[0], tmp[1]);
>        }
>  
> +      if (!need_derivs) {
> +         ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
> +      }
> +      mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
> +      signmabit = LLVMBuildAnd(builder, mai, signmask, "");
> +
>        si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
>        ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
>        ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
> -      signs = LLVMBuildAnd(builder, si, signmask, "");
> -      signt = LLVMBuildAnd(builder, ti, signmask, "");
> -      signr = LLVMBuildAnd(builder, ri, signmask, "");
>  
>        /*
> -       * compute all possible new s/t coords
> -       * snewx = signs * -r;
> +       * compute all possible new s/t coords, which does the mirroring
> +       * snewx = signma * -r;
>         * tnewx = -t;
>         * snewy = s;
> -       * tnewy = signt * r;
> -       * snewz = signr * s;
> +       * tnewy = signma * r;
> +       * snewz = signma * s;
>         * tnewz = -t;
>         */
>        tnegi = LLVMBuildXor(builder, ti, signmask, "");
>        rnegi = LLVMBuildXor(builder, ri, signmask, "");
>  
> -      snewx = LLVMBuildXor(builder, signs, rnegi, "");
> +      snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
>        tnewx = tnegi;
>  
>        snewy = si;
> -      tnewy = LLVMBuildXor(builder, signt, ri, "");
> +      tnewy = LLVMBuildXor(builder, signmabit, ri, "");
>  
> -      snewz = LLVMBuildXor(builder, signr, si, "");
> +      snewz = LLVMBuildXor(builder, signmabit, si, "");
>        tnewz = tnegi;
>  
> -      /* XXX on x86 unclear if we should cast the values back to float
> -       * or not - on some cpus (nehalem) pblendvb has twice the throughput
> -       * of blendvps though on others there just might be domain
> -       * transition penalties when using it (this depends on what llvm
> -       * will chose for the bit ops above so there appears no "right way",
> -       * but given the boatload of selects let's just use the int type).
> -       */
> -
> -      /* select/mirror */
> -      if (!need_derivs) {
> -         ma = lp_build_select(coord_bld, as_ge_at, s, t);
> -      }
> -      face_s = lp_build_select(cint_bld, as_ge_at, snewx, snewy);
> -      face_t = lp_build_select(cint_bld, as_ge_at, tnewx, tnewy);
> -      face = lp_build_select(cint_bld, as_ge_at, facex, facey);
> -
> -      if (!need_derivs) {
> -         ma = lp_build_select(coord_bld, ar_ge_as_at, r, ma);
> -      }
> -      face_s = lp_build_select(cint_bld, ar_ge_as_at, snewz, face_s);
> -      face_t = lp_build_select(cint_bld, ar_ge_as_at, tnewz, face_t);
> -      face = lp_build_select(cint_bld, ar_ge_as_at, facez, face);
> +      /* select the mirrored values */
> +      face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx,
> snewy, snewz);
> +      face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx,
> tnewy, tnewz);
> +      face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey,
> facez);
>  
>        face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
>        face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
> @@ -1684,15 +1840,14 @@ lp_build_cube_lookup(struct lp_build_sample_context
> *bld,
>         * as long as we ensure vblendvps gets used we can actually
>         * skip the comparison and just use sign as a "mask" directly.
>         */
> -      mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
>        signma = LLVMBuildLShr(builder, mai, signshift, "");
>        coords[2] = LLVMBuildOr(builder, face, signma, "face");
>  
>        /* project coords */
>        if (!need_derivs) {
> -         ima = lp_build_cube_imapos(coord_bld, ma);
> -         face_s = lp_build_mul(coord_bld, face_s, ima);
> -         face_t = lp_build_mul(coord_bld, face_t, ima);
> +         imahalfpos = lp_build_cube_imapos(coord_bld, ma);
> +         face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
> +         face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
>        }
>  
>        coords[0] = lp_build_add(coord_bld, face_s, posHalf);
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> index 803a99e..70f0350 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> @@ -457,8 +457,9 @@ lp_build_unnormalized_coords(struct
> lp_build_sample_context *bld,
>  void
>  lp_build_cube_lookup(struct lp_build_sample_context *bld,
>                       LLVMValueRef *coords,
> -                     const struct lp_derivatives *derivs, /* optional */
> +                     const struct lp_derivatives *derivs_in, /* optional */
>                       LLVMValueRef *rho,
> +                     struct lp_derivatives *derivs_out, /* optional */
>                       boolean need_derivs);
>  
>  
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> index 33378bc..54dee25 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> @@ -1387,6 +1387,7 @@ lp_build_sample_common(struct lp_build_sample_context
> *bld,
>     const unsigned target = bld->static_texture_state->target;
>     LLVMValueRef first_level, cube_rho = NULL;
>     LLVMValueRef lod_ipart = NULL;
> +   struct lp_derivatives cube_derivs;
>  
>     /*
>     printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
> @@ -1403,7 +1404,8 @@ lp_build_sample_common(struct lp_build_sample_context
> *bld,
>                        mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
>                        !bld->static_sampler_state->min_max_lod_equal &&
>                        !explicit_lod);
> -      lp_build_cube_lookup(bld, coords, derivs, &cube_rho, need_derivs);
> +      lp_build_cube_lookup(bld, coords, derivs, &cube_rho, &cube_derivs,
> need_derivs);
> +      derivs = &cube_derivs;
>     }
>     else if (target == PIPE_TEXTURE_1D_ARRAY ||
>              target == PIPE_TEXTURE_2D_ARRAY) {
> @@ -2163,9 +2165,24 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>      * avoided like min and max lod being equal.
>      */
>     bld.num_mips = bld.num_lods = 1;
> -   if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
> -       (explicit_lod || lod_bias ||
> -        (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) {
> +
> +   if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
> +       (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
> +       (static_texture_state->target == PIPE_TEXTURE_CUBE) &&
> +       (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
> +      /*
> +       * special case for using per-pixel lod even for implicit lod,
> +       * which is generally never required (ok by APIs) except to please
> +       * some (somewhat broken imho) tests (because per-pixel face selection
> +       * can cause derivatives to be different for pixels outside the
> primitive
> +       * due to the major axis division even if pre-project derivatives are
> +       * looking normal).
> +       */
> +      bld.num_mips = type.length;
> +      bld.num_lods = type.length;
> +   }
> +   else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
> +       (explicit_lod || lod_bias || derivs)) {
>        if ((is_fetch && target != PIPE_BUFFER) ||
>            (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
>           bld.num_mips = type.length;
> @@ -2371,9 +2388,15 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>           bld4.texel_type.length = 4;
>  
>           bld4.num_mips = bld4.num_lods = 1;
> +         if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
> +             (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
> +             (static_texture_state->target == PIPE_TEXTURE_CUBE) &&
> +             (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
> +            bld4.num_mips = type4.length;
> +            bld4.num_lods = type4.length;
> +         }
>           if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
> -             (explicit_lod || lod_bias ||
> -              (derivs && static_texture_state->target !=
> PIPE_TEXTURE_CUBE))) {
> +             (explicit_lod || lod_bias || derivs)) {
>              if ((is_fetch && target != PIPE_BUFFER) ||
>                  (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
>                 bld4.num_mips = type4.length;
> --
> 1.7.9.5
>