[Mesa-dev] [PATCH 2/3] gallivm: handle explicit derivatives for cubemaps

Thu Oct 3 15:23:59 PDT 2013

Am 03.10.2013 21:39, schrieb Brian Paul:
> On 10/03/2013 09:42 AM, sroland at vmware.com wrote:
>> From: Roland Scheidegger <sroland at vmware.com>
>>
>> They need some special handling. Quite complicated.
>> Additionally, use the same code for implicit derivatives too if
>> no_rho_approx
>> and no_quad_lod is set, because it seems while generally it should be ok
>> to use per quad lod for implicit derivatives there's at least some
>> test which
>> insists that in case of cubemaps the shared lod value MUST come from a
>> pixel
>> inside the primitive (due to the derivatives becoming different if a
>> different
>> larger major axis is chosen).
>> ---
>>   src/gallium/auxiliary/gallivm/lp_bld_sample.c     |  221
>> +++++++++++++++++++--
>>   src/gallium/auxiliary/gallivm/lp_bld_sample.h     |    3 +-
>>   src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |   35 +++-
>>   3 files changed, 231 insertions(+), 28 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
>> b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
>> index ea6bec7..ce05522 100644
>> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
>> @@ -273,7 +273,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
>>         cubesize = lp_build_mul(rho_bld, cubesize, cubesize);
>>         rho = lp_build_mul(rho_bld, cubesize, rho);
>>      }
>> -   else if (derivs && !(bld->static_texture_state->target ==
>> PIPE_TEXTURE_CUBE)) {
>> +   else if (derivs) {
>>         LLVMValueRef ddmax[3], ddx[3], ddy[3];
>>         for (i = 0; i < dims; i++) {
>>            LLVMValueRef floatdim;
>> @@ -1488,8 +1488,9 @@ lp_build_cube_face(struct
>> lp_build_sample_context *bld,
>>   void
>>   lp_build_cube_lookup(struct lp_build_sample_context *bld,
>>                        LLVMValueRef *coords,
>> -                     const struct lp_derivatives *derivs, /* optional */
>> +                     const struct lp_derivatives *derivs_in, /*
>> optional */
>>                        LLVMValueRef *rho,
>> +                     struct lp_derivatives *derivs_out, /* optional */
>>                        boolean need_derivs)
>>   {
>>      struct lp_build_context *coord_bld = &bld->coord_bld;
>> @@ -1512,8 +1513,6 @@ lp_build_cube_lookup(struct
>> lp_build_sample_context *bld,
>>          * the edge). Still this is possibly a win over just selecting
>> the same face
>>          * for all pixels. Unfortunately, something like that doesn't
>> work for
>>          * explicit derivatives.
>> -       * TODO: handle explicit derivatives by transforming them
>> alongside coords
>> -       * somehow.
>>          */
>>         struct lp_build_context *cint_bld = &bld->int_coord_bld;
>>         struct lp_type intctype = cint_bld->type;
>> @@ -1522,7 +1521,7 @@ lp_build_cube_lookup(struct
>> lp_build_sample_context *bld,
>>         LLVMValueRef as_ge_at, maxasat, ar_ge_as_at;
>>         LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz;
>>         LLVMValueRef tnegi, rnegi;
>> -      LLVMValueRef ma, mai, ima;
>> +      LLVMValueRef ma, mai, imahalfpos;
>>         LLVMValueRef posHalf = lp_build_const_vec(gallivm,
>> coord_bld->type, 0.5);
>>         LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype,
>>                                                        1 <<
>> (intctype.width - 1));
>> @@ -1561,7 +1560,195 @@ lp_build_cube_lookup(struct
>> lp_build_sample_context *bld,
>>         maxasat = lp_build_max(coord_bld, as, at);
>>         ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar,
>> maxasat);
>>
>> -      if (need_derivs) {
>> +      if (need_derivs && (derivs_in ||
>> +          ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
>> +           (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX)))) {
>> +         /*
>> +          * XXX: This is really really complex.
>> +          * It is a bit overkill to use this for implicit derivatives
>> as well,
>> +          * no way this is worth the cost in practice, but seems to
>> be the
>> +          * only way for getting accurate and per-pixel lod values.
>> +          */
>> +         LLVMValueRef imapos, tmp, ddx[3], ddy[3];
>> +         LLVMValueRef madx, mady, madxdivma, madydivma;
>> +         LLVMValueRef sdxi, tdxi, rdxi, signsdx, signtdx, signrdx;
>> +         LLVMValueRef sdyi, tdyi, rdyi, signsdy, signtdy, signrdy;
>> +         LLVMValueRef tdxnegi, rdxnegi, tdynegi, rdynegi;
>> +         LLVMValueRef sdxnewx, sdxnewy, sdxnewz, tdxnewx, tdxnewy,
>> tdxnewz;
>> +         LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy,
>> tdynewz;
>> +         LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy;
>> +         LLVMValueRef posHalf = lp_build_const_vec(coord_bld->gallivm,
>> +                                                   coord_bld->type,
>> 0.5);
>> +         /*
>> +          * s = 1/2 * ( sc / ma + 1)
>> +          * t = 1/2 * ( tc / ma + 1)
>> +          *
>> +          * s' = 1/2 * (sc' * ma - sc * ma') / ma^2
>> +          * t' = 1/2 * (tc' * ma - tc * ma') / ma^2
>> +          *
>> +          * dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma
>> +          * dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma
>> +          * dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma
>> +          * dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma
>> +          */
>> +
>> +         /* select ma, calculate ima */
>> +         ma = lp_build_select(coord_bld, as_ge_at, s, t);
>> +         ma = lp_build_select(coord_bld, ar_ge_as_at, r, ma);
>> +         imapos = lp_build_abs(coord_bld, ma);
>> +         imapos = lp_build_div(coord_bld, coord_bld->one, imapos);
>> +         imahalfpos = lp_build_mul(coord_bld, posHalf, imapos);
>> +
>> +         if (!derivs_in) {
>> +            ddx[0] = lp_build_ddx(coord_bld, s);
>> +            ddx[1] = lp_build_ddx(coord_bld, t);
>> +            ddx[2] = lp_build_ddx(coord_bld, r);
>> +            ddy[0] = lp_build_ddy(coord_bld, s);
>> +            ddy[1] = lp_build_ddy(coord_bld, t);
>> +            ddy[2] = lp_build_ddy(coord_bld, r);
>> +         }
>> +         else {
>> +            ddx[0] = derivs_in->ddx[0];
>> +            ddx[1] = derivs_in->ddx[1];
>> +            ddx[2] = derivs_in->ddx[2];
>> +            ddy[0] = derivs_in->ddy[0];
>> +            ddy[1] = derivs_in->ddy[1];
>> +            ddy[2] = derivs_in->ddy[2];
>> +         }
>> +
>> +         /* select major derivatives */
>> +         madx = lp_build_select(coord_bld, as_ge_at, ddx[0], ddx[1]);
>> +         madx = lp_build_select(coord_bld, ar_ge_as_at, ddx[2], madx);
>> +         madx = lp_build_abs(coord_bld, madx);
> 
> There's quite a few of these double lp_bould_select() sequences here and
> below.
> 
> It might be nice to wrap this up in a utility function along the lines of:
> 
>     select3(c1, c2, x, y, z)
>     {
>         if c1, return a;
>         else if c2, return b;
>         else return c;
>     }
> 
> It would only reduce two lines to one, but it might be more readable.

Yes the duplication is a result of derivatives being treated the same as
coords so all the logic for coord mirroring/selected is repeated mostly.
Something like add3 would be quite intuitive (though I don't there would
be that many callers) but I'm not so sure about select3 - without
looking up the function it isn't really obvious what will be selected
depending on the conditions.

> 
> 
>> +         mady = lp_build_select(coord_bld, as_ge_at, ddy[0], ddy[1]);
>> +         mady = lp_build_select(coord_bld, ar_ge_as_at, ddy[2], mady);
>> +         mady = lp_build_abs(coord_bld, mady);
>> +
>> +         si = LLVMBuildBitCast(builder, s, lp_build_vec_type(gallivm,
>> intctype), "");
>> +         ti = LLVMBuildBitCast(builder, t, lp_build_vec_type(gallivm,
>> intctype), "");
>> +         ri = LLVMBuildBitCast(builder, r, lp_build_vec_type(gallivm,
>> intctype), "");
>> +         signs = LLVMBuildAnd(builder, si, signmask, "");
>> +         signt = LLVMBuildAnd(builder, ti, signmask, "");
>> +         signr = LLVMBuildAnd(builder, ri, signmask, "");
>> +
>> +         sdxi = LLVMBuildBitCast(builder, ddx[0],
>> lp_build_vec_type(gallivm, intctype), "");
>> +         tdxi = LLVMBuildBitCast(builder, ddx[1],
>> lp_build_vec_type(gallivm, intctype), "");
>> +         rdxi = LLVMBuildBitCast(builder, ddx[2],
>> lp_build_vec_type(gallivm, intctype), "");
>> +         signsdx = LLVMBuildAnd(builder, sdxi, signmask, "");
>> +         signtdx = LLVMBuildAnd(builder, tdxi, signmask, "");
>> +         signrdx = LLVMBuildAnd(builder, rdxi, signmask, "");
>> +
>> +         sdyi = LLVMBuildBitCast(builder, ddy[0],
>> lp_build_vec_type(gallivm, intctype), "");
>> +         tdyi = LLVMBuildBitCast(builder, ddy[1],
>> lp_build_vec_type(gallivm, intctype), "");
>> +         rdyi = LLVMBuildBitCast(builder, ddy[2],
>> lp_build_vec_type(gallivm, intctype), "");
>> +         signsdy = LLVMBuildAnd(builder, sdyi, signmask, "");
>> +         signtdy = LLVMBuildAnd(builder, tdyi, signmask, "");
>> +         signrdy = LLVMBuildAnd(builder, rdyi, signmask, "");
> 
> Lots of similar lines of code.  Would a get_sign(vec) helper be useful?
Yes though I think I rather name it get_sign_bit();

> 
> And maybe put lp_build_vec_type(gallivm, intctype), in a local var?
Yes.

> 
> 
> 
>> +         /*
>> +          * compute all possible new s/t coords, same for derivs
>> +          * snewx = signs * -r;
>> +          * tnewx = -t;
>> +          * snewy = s;
>> +          * tnewy = signt * r;
>> +          * snewz = signr * s;
>> +          * tnewz = -t;
>> +          */
>> +         tnegi = LLVMBuildXor(builder, ti, signmask, "");
>> +         rnegi = LLVMBuildXor(builder, ri, signmask, "");
>> +         tdxnegi = LLVMBuildXor(builder, tdxi, signmask, "");
>> +         rdxnegi = LLVMBuildXor(builder, rdxi, signmask, "");
>> +         tdynegi = LLVMBuildXor(builder, tdyi, signmask, "");
>> +         rdynegi = LLVMBuildXor(builder, rdyi, signmask, "");
>> +
>> +         snewx = LLVMBuildXor(builder, signs, rnegi, "");
>> +         tnewx = tnegi;
>> +         sdxnewx = LLVMBuildXor(builder, signsdx, rdxnegi, "");
>> +         tdxnewx = tdxnegi;
>> +         sdynewx = LLVMBuildXor(builder, signsdy, rdynegi, "");
>> +         tdynewx = tdynegi;
>> +
>> +         snewy = si;
>> +         tnewy = LLVMBuildXor(builder, signt, ri, "");
>> +         sdxnewy = sdxi;
>> +         tdxnewy = LLVMBuildXor(builder, signtdx, rdxi, "");
>> +         sdynewy = sdyi;
>> +         tdynewy = LLVMBuildXor(builder, signtdy, rdyi, "");
>> +
>> +         snewz = LLVMBuildXor(builder, signr, si, "");
>> +         tnewz = tnegi;
>> +         sdxnewz = LLVMBuildXor(builder, signrdx, sdxi, "");
>> +         tdxnewz = tdxnegi;
>> +         sdynewz = LLVMBuildXor(builder, signrdy, sdyi, "");
>> +         tdynewz = tdynegi;
>> +
>> +         /* select/mirror */
>> +         face_s = lp_build_select(cint_bld, as_ge_at, snewx, snewy);
>> +         face_t = lp_build_select(cint_bld, as_ge_at, tnewx, tnewy);
>> +         face = lp_build_select(cint_bld, as_ge_at, facex, facey);
>> +         face_sdx = lp_build_select(cint_bld, as_ge_at, sdxnewx,
>> sdxnewy);
>> +         face_tdx = lp_build_select(cint_bld, as_ge_at, tdxnewx,
>> tdxnewy);
>> +         face_sdy = lp_build_select(cint_bld, as_ge_at, sdynewx,
>> sdynewy);
>> +         face_tdy = lp_build_select(cint_bld, as_ge_at, tdynewx,
>> tdynewy);
>> +
>> +         face_s = lp_build_select(cint_bld, ar_ge_as_at, snewz, face_s);
>> +         face_t = lp_build_select(cint_bld, ar_ge_as_at, tnewz, face_t);
>> +         face = lp_build_select(cint_bld, ar_ge_as_at, facez, face);
>> +         face_sdx = lp_build_select(cint_bld, ar_ge_as_at, sdxnewz,
>> face_sdx);
>> +         face_tdx = lp_build_select(cint_bld, ar_ge_as_at, tdxnewz,
>> face_tdx);
>> +         face_sdy = lp_build_select(cint_bld, ar_ge_as_at, sdynewz,
>> face_sdy);
>> +         face_tdy = lp_build_select(cint_bld, ar_ge_as_at, tdynewz,
>> face_tdy);
>> +
>> +         face_s = LLVMBuildBitCast(builder, face_s,
>> +                                  lp_build_vec_type(gallivm,
>> coord_bld->type), "");
>> +         face_t = LLVMBuildBitCast(builder, face_t,
>> +                                  lp_build_vec_type(gallivm,
>> coord_bld->type), "");
>> +         face_sdx = LLVMBuildBitCast(builder, face_sdx,
>> +                                     lp_build_vec_type(gallivm,
>> coord_bld->type), "");
>> +         face_tdx = LLVMBuildBitCast(builder, face_tdx,
>> +                                     lp_build_vec_type(gallivm,
>> coord_bld->type), "");
>> +         face_sdy = LLVMBuildBitCast(builder, face_sdy,
>> +                                     lp_build_vec_type(gallivm,
>> coord_bld->type), "");
>> +         face_tdy = LLVMBuildBitCast(builder, face_tdy,
>> +                                     lp_build_vec_type(gallivm,
>> coord_bld->type), "");
> 
> Maybe put lp_build_vec_type(gallivm, coord_bld->type) in a local var?
Yes, that's a good idea. Actually we already have the type
(coord_bld->vec_type), not sure why I was calling the build_vec_type()
stuff at all...

> 
> 
>> +
>> +         /* deriv math, dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma */
>> +         madxdivma = lp_build_mul(coord_bld, madx, imapos);
>> +         tmp = lp_build_mul(coord_bld, madxdivma, face_s);
>> +         tmp = lp_build_sub(coord_bld, face_sdx, tmp);
>> +         derivs_out->ddx[0] = lp_build_mul(coord_bld, tmp, imahalfpos);
>> +
>> +         /* dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma */
>> +         tmp = lp_build_mul(coord_bld, madxdivma, face_t);
>> +         tmp = lp_build_sub(coord_bld, face_tdx, tmp);
>> +         derivs_out->ddx[1] = lp_build_mul(coord_bld, tmp, imahalfpos);
>> +
>> +         /* dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma */
>> +         madydivma = lp_build_mul(coord_bld, mady, imapos);
>> +         tmp = lp_build_mul(coord_bld, madydivma, face_s);
>> +         tmp = lp_build_sub(coord_bld, face_sdy, tmp);
>> +         derivs_out->ddy[0] = lp_build_mul(coord_bld, tmp, imahalfpos);
>> +
>> +         /* dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma */
>> +         tmp = lp_build_mul(coord_bld, madydivma, face_t);
>> +         tmp = lp_build_sub(coord_bld, face_tdy, tmp);
>> +         derivs_out->ddy[1] = lp_build_mul(coord_bld, tmp, imahalfpos);
>> +
>> +         mai = LLVMBuildBitCast(builder, ma,
>> lp_build_vec_type(gallivm, intctype), "");
>> +         signma = LLVMBuildLShr(builder, mai, signshift, "");
>> +         coords[2] = LLVMBuildOr(builder, face, signma, "face");
>> +
>> +         /* project coords */
>> +         face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
>> +         face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
>> +
>> +         coords[0] = lp_build_add(coord_bld, face_s, posHalf);
>> +         coords[1] = lp_build_add(coord_bld, face_t, posHalf);
>> +
>> +         return;
>> +      }
>> +
>> +      else if (need_derivs) {
>>            LLVMValueRef ddx_ddy[2], tmp[3], rho_vec;
>>            static const unsigned char swizzle0[] = { /* no-op swizzle */
>>               0, LP_BLD_SWIZZLE_DONTCARE,
>> @@ -1590,10 +1777,10 @@ lp_build_cube_lookup(struct
>> lp_build_sample_context *bld,
>>             */
>>            ma = lp_build_select(coord_bld, as_ge_at, s, t);
>>            ma = lp_build_select(coord_bld, ar_ge_as_at, r, ma);
>> -         ima = lp_build_cube_imapos(coord_bld, ma);
>> -         s = lp_build_mul(coord_bld, s, ima);
>> -         t = lp_build_mul(coord_bld, t, ima);
>> -         r = lp_build_mul(coord_bld, r, ima);
>> +         imahalfpos = lp_build_cube_imapos(coord_bld, ma);
>> +         s = lp_build_mul(coord_bld, s, imahalfpos);
>> +         t = lp_build_mul(coord_bld, t, imahalfpos);
>> +         r = lp_build_mul(coord_bld, r, imahalfpos);
>>
>>            /*
>>             * This isn't quite the same as the "ordinary" (3d deriv)
>> path since we
>> @@ -1651,14 +1838,6 @@ lp_build_cube_lookup(struct
>> lp_build_sample_context *bld,
>>         snewz = LLVMBuildXor(builder, signr, si, "");
>>         tnewz = tnegi;
>>
>> -      /* XXX on x86 unclear if we should cast the values back to float
>> -       * or not - on some cpus (nehalem) pblendvb has twice the
>> throughput
>> -       * of blendvps though on others there just might be domain
>> -       * transition penalties when using it (this depends on what llvm
>> -       * will chose for the bit ops above so there appears no "right
>> way",
>> -       * but given the boatload of selects let's just use the int type).
>> -       */
>> -
>>         /* select/mirror */
>>         if (!need_derivs) {
>>            ma = lp_build_select(coord_bld, as_ge_at, s, t);
>> @@ -1690,9 +1869,9 @@ lp_build_cube_lookup(struct
>> lp_build_sample_context *bld,
>>
>>         /* project coords */
>>         if (!need_derivs) {
>> -         ima = lp_build_cube_imapos(coord_bld, ma);
>> -         face_s = lp_build_mul(coord_bld, face_s, ima);
>> -         face_t = lp_build_mul(coord_bld, face_t, ima);
>> +         imahalfpos = lp_build_cube_imapos(coord_bld, ma);
>> +         face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
>> +         face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
>>         }
>>
>>         coords[0] = lp_build_add(coord_bld, face_s, posHalf);
>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
>> b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
>> index 803a99e..70f0350 100644
>> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
>> @@ -457,8 +457,9 @@ lp_build_unnormalized_coords(struct
>> lp_build_sample_context *bld,
>>   void
>>   lp_build_cube_lookup(struct lp_build_sample_context *bld,
>>                        LLVMValueRef *coords,
>> -                     const struct lp_derivatives *derivs, /* optional */
>> +                     const struct lp_derivatives *derivs_in, /*
>> optional */
>>                        LLVMValueRef *rho,
>> +                     struct lp_derivatives *derivs_out, /* optional */
>>                        boolean need_derivs);
>>
>>
>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
>> b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
>> index 33378bc..54dee25 100644
>> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
>> @@ -1387,6 +1387,7 @@ lp_build_sample_common(struct
>> lp_build_sample_context *bld,
>>      const unsigned target = bld->static_texture_state->target;
>>      LLVMValueRef first_level, cube_rho = NULL;
>>      LLVMValueRef lod_ipart = NULL;
>> +   struct lp_derivatives cube_derivs;
>>
>>      /*
>>      printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
>> @@ -1403,7 +1404,8 @@ lp_build_sample_common(struct
>> lp_build_sample_context *bld,
>>                         mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
>>                         !bld->static_sampler_state->min_max_lod_equal &&
>>                         !explicit_lod);
>> -      lp_build_cube_lookup(bld, coords, derivs, &cube_rho, need_derivs);
>> +      lp_build_cube_lookup(bld, coords, derivs, &cube_rho,
>> &cube_derivs, need_derivs);
>> +      derivs = &cube_derivs;
>>      }
>>      else if (target == PIPE_TEXTURE_1D_ARRAY ||
>>               target == PIPE_TEXTURE_2D_ARRAY) {
>> @@ -2163,9 +2165,24 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>>       * avoided like min and max lod being equal.
>>       */
>>      bld.num_mips = bld.num_lods = 1;
>> -   if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
>> -       (explicit_lod || lod_bias ||
>> -        (derivs && static_texture_state->target !=
>> PIPE_TEXTURE_CUBE))) {
>> +
>> +   if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
>> +       (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
>> +       (static_texture_state->target == PIPE_TEXTURE_CUBE) &&
>> +       (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
>> +      /*
>> +       * special case for using per-pixel lod even for implicit lod,
>> +       * which is generally never required (ok by APIs) except to please
>> +       * some (somewhat broken imho) tests (because per-pixel face
>> selection
>> +       * can cause derivatives to be different for pixels outside the
>> primitive
>> +       * due to the major axis division even if pre-project
>> derivatives are
>> +       * looking normal).
>> +       */
>> +      bld.num_mips = type.length;
>> +      bld.num_lods = type.length;
>> +   }
>> +   else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
>> +       (explicit_lod || lod_bias || derivs)) {
>>         if ((is_fetch && target != PIPE_BUFFER) ||
>>             (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
>>            bld.num_mips = type.length;
>> @@ -2371,9 +2388,15 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>>            bld4.texel_type.length = 4;
>>
>>            bld4.num_mips = bld4.num_lods = 1;
>> +         if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
>> +             (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
>> +             (static_texture_state->target == PIPE_TEXTURE_CUBE) &&
>> +             (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
>> +            bld4.num_mips = type4.length;
>> +            bld4.num_lods = type4.length;
>> +         }
>>            if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
>> -             (explicit_lod || lod_bias ||
>> -              (derivs && static_texture_state->target !=
>> PIPE_TEXTURE_CUBE))) {
>> +             (explicit_lod || lod_bias || derivs)) {
>>               if ((is_fetch && target != PIPE_BUFFER) ||
>>                   (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
>>                  bld4.num_mips = type4.length;
>>

I'll actually have to redo this series (well at least this part the
others are trivial) as it apparently still doesn't work correctly. Looks
like the math doesn't work out but I can't find the error.

Roland