[Mesa-dev] [PATCH 2/3] gallivm: handle explicit derivatives for cubemaps

sroland at vmware.com sroland at vmware.com
Thu Oct 3 08:42:21 PDT 2013


From: Roland Scheidegger <sroland at vmware.com>

They need some special handling. Quite complicated.
Additionally, use the same code for implicit derivatives too if no_rho_approx
and no_quad_lod is set, because it seems while generally it should be ok
to use per quad lod for implicit derivatives there's at least some test which
insists that in case of cubemaps the shared lod value MUST come from a pixel
inside the primitive (due to the derivatives becoming different if a different
larger major axis is chosen).
---
 src/gallium/auxiliary/gallivm/lp_bld_sample.c     |  221 +++++++++++++++++++--
 src/gallium/auxiliary/gallivm/lp_bld_sample.h     |    3 +-
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |   35 +++-
 3 files changed, 231 insertions(+), 28 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index ea6bec7..ce05522 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -273,7 +273,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
       cubesize = lp_build_mul(rho_bld, cubesize, cubesize);
       rho = lp_build_mul(rho_bld, cubesize, rho);
    }
-   else if (derivs && !(bld->static_texture_state->target == PIPE_TEXTURE_CUBE)) {
+   else if (derivs) {
       LLVMValueRef ddmax[3], ddx[3], ddy[3];
       for (i = 0; i < dims; i++) {
          LLVMValueRef floatdim;
@@ -1488,8 +1488,9 @@ lp_build_cube_face(struct lp_build_sample_context *bld,
 void
 lp_build_cube_lookup(struct lp_build_sample_context *bld,
                      LLVMValueRef *coords,
-                     const struct lp_derivatives *derivs, /* optional */
+                     const struct lp_derivatives *derivs_in, /* optional */
                      LLVMValueRef *rho,
+                     struct lp_derivatives *derivs_out, /* optional */
                      boolean need_derivs)
 {
    struct lp_build_context *coord_bld = &bld->coord_bld;
@@ -1512,8 +1513,6 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
        * the edge). Still this is possibly a win over just selecting the same face
        * for all pixels. Unfortunately, something like that doesn't work for
        * explicit derivatives.
-       * TODO: handle explicit derivatives by transforming them alongside coords
-       * somehow.
        */
       struct lp_build_context *cint_bld = &bld->int_coord_bld;
       struct lp_type intctype = cint_bld->type;
@@ -1522,7 +1521,7 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
       LLVMValueRef as_ge_at, maxasat, ar_ge_as_at;
       LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz;
       LLVMValueRef tnegi, rnegi;
-      LLVMValueRef ma, mai, ima;
+      LLVMValueRef ma, mai, imahalfpos;
       LLVMValueRef posHalf = lp_build_const_vec(gallivm, coord_bld->type, 0.5);
       LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype,
                                                      1 << (intctype.width - 1));
@@ -1561,7 +1560,195 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
       maxasat = lp_build_max(coord_bld, as, at);
       ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat);
 
-      if (need_derivs) {
+      if (need_derivs && (derivs_in ||
+          ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
+           (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX)))) {
+         /*
+          * XXX: This is really really complex.
+          * It is a bit overkill to use this for implicit derivatives as well,
+          * no way this is worth the cost in practice, but seems to be the
+          * only way for getting accurate and per-pixel lod values.
+          */
+         LLVMValueRef imapos, tmp, ddx[3], ddy[3];
+         LLVMValueRef madx, mady, madxdivma, madydivma;
+         LLVMValueRef sdxi, tdxi, rdxi, signsdx, signtdx, signrdx;
+         LLVMValueRef sdyi, tdyi, rdyi, signsdy, signtdy, signrdy;
+         LLVMValueRef tdxnegi, rdxnegi, tdynegi, rdynegi;
+         LLVMValueRef sdxnewx, sdxnewy, sdxnewz, tdxnewx, tdxnewy, tdxnewz;
+         LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz;
+         LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy;
+         LLVMValueRef posHalf = lp_build_const_vec(coord_bld->gallivm,
+                                                   coord_bld->type, 0.5);
+         /*
+          * s = 1/2 * ( sc / ma + 1)
+          * t = 1/2 * ( tc / ma + 1)
+          *
+          * s' = 1/2 * (sc' * ma - sc * ma') / ma^2
+          * t' = 1/2 * (tc' * ma - tc * ma') / ma^2
+          *
+          * dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma
+          * dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma
+          * dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma
+          * dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma
+          */
+
+         /* select ma, calculate ima */
+         ma = lp_build_select(coord_bld, as_ge_at, s, t);
+         ma = lp_build_select(coord_bld, ar_ge_as_at, r, ma);
+         imapos = lp_build_abs(coord_bld, ma);
+         imapos = lp_build_div(coord_bld, coord_bld->one, imapos);
+         imahalfpos = lp_build_mul(coord_bld, posHalf, imapos);
+
+         if (!derivs_in) {
+            ddx[0] = lp_build_ddx(coord_bld, s);
+            ddx[1] = lp_build_ddx(coord_bld, t);
+            ddx[2] = lp_build_ddx(coord_bld, r);
+            ddy[0] = lp_build_ddy(coord_bld, s);
+            ddy[1] = lp_build_ddy(coord_bld, t);
+            ddy[2] = lp_build_ddy(coord_bld, r);
+         }
+         else {
+            ddx[0] = derivs_in->ddx[0];
+            ddx[1] = derivs_in->ddx[1];
+            ddx[2] = derivs_in->ddx[2];
+            ddy[0] = derivs_in->ddy[0];
+            ddy[1] = derivs_in->ddy[1];
+            ddy[2] = derivs_in->ddy[2];
+         }
+
+         /* select major derivatives */
+         madx = lp_build_select(coord_bld, as_ge_at, ddx[0], ddx[1]);
+         madx = lp_build_select(coord_bld, ar_ge_as_at, ddx[2], madx);
+         madx = lp_build_abs(coord_bld, madx);
+         mady = lp_build_select(coord_bld, as_ge_at, ddy[0], ddy[1]);
+         mady = lp_build_select(coord_bld, ar_ge_as_at, ddy[2], mady);
+         mady = lp_build_abs(coord_bld, mady);
+
+         si = LLVMBuildBitCast(builder, s, lp_build_vec_type(gallivm, intctype), "");
+         ti = LLVMBuildBitCast(builder, t, lp_build_vec_type(gallivm, intctype), "");
+         ri = LLVMBuildBitCast(builder, r, lp_build_vec_type(gallivm, intctype), "");
+         signs = LLVMBuildAnd(builder, si, signmask, "");
+         signt = LLVMBuildAnd(builder, ti, signmask, "");
+         signr = LLVMBuildAnd(builder, ri, signmask, "");
+
+         sdxi = LLVMBuildBitCast(builder, ddx[0], lp_build_vec_type(gallivm, intctype), "");
+         tdxi = LLVMBuildBitCast(builder, ddx[1], lp_build_vec_type(gallivm, intctype), "");
+         rdxi = LLVMBuildBitCast(builder, ddx[2], lp_build_vec_type(gallivm, intctype), "");
+         signsdx = LLVMBuildAnd(builder, sdxi, signmask, "");
+         signtdx = LLVMBuildAnd(builder, tdxi, signmask, "");
+         signrdx = LLVMBuildAnd(builder, rdxi, signmask, "");
+
+         sdyi = LLVMBuildBitCast(builder, ddy[0], lp_build_vec_type(gallivm, intctype), "");
+         tdyi = LLVMBuildBitCast(builder, ddy[1], lp_build_vec_type(gallivm, intctype), "");
+         rdyi = LLVMBuildBitCast(builder, ddy[2], lp_build_vec_type(gallivm, intctype), "");
+         signsdy = LLVMBuildAnd(builder, sdyi, signmask, "");
+         signtdy = LLVMBuildAnd(builder, tdyi, signmask, "");
+         signrdy = LLVMBuildAnd(builder, rdyi, signmask, "");
+
+         /*
+          * compute all possible new s/t coords, same for derivs
+          * snewx = signs * -r;
+          * tnewx = -t;
+          * snewy = s;
+          * tnewy = signt * r;
+          * snewz = signr * s;
+          * tnewz = -t;
+          */
+         tnegi = LLVMBuildXor(builder, ti, signmask, "");
+         rnegi = LLVMBuildXor(builder, ri, signmask, "");
+         tdxnegi = LLVMBuildXor(builder, tdxi, signmask, "");
+         rdxnegi = LLVMBuildXor(builder, rdxi, signmask, "");
+         tdynegi = LLVMBuildXor(builder, tdyi, signmask, "");
+         rdynegi = LLVMBuildXor(builder, rdyi, signmask, "");
+
+         snewx = LLVMBuildXor(builder, signs, rnegi, "");
+         tnewx = tnegi;
+         sdxnewx = LLVMBuildXor(builder, signsdx, rdxnegi, "");
+         tdxnewx = tdxnegi;
+         sdynewx = LLVMBuildXor(builder, signsdy, rdynegi, "");
+         tdynewx = tdynegi;
+
+         snewy = si;
+         tnewy = LLVMBuildXor(builder, signt, ri, "");
+         sdxnewy = sdxi;
+         tdxnewy = LLVMBuildXor(builder, signtdx, rdxi, "");
+         sdynewy = sdyi;
+         tdynewy = LLVMBuildXor(builder, signtdy, rdyi, "");
+
+         snewz = LLVMBuildXor(builder, signr, si, "");
+         tnewz = tnegi;
+         sdxnewz = LLVMBuildXor(builder, signrdx, sdxi, "");
+         tdxnewz = tdxnegi;
+         sdynewz = LLVMBuildXor(builder, signrdy, sdyi, "");
+         tdynewz = tdynegi;
+
+         /* select/mirror */
+         face_s = lp_build_select(cint_bld, as_ge_at, snewx, snewy);
+         face_t = lp_build_select(cint_bld, as_ge_at, tnewx, tnewy);
+         face = lp_build_select(cint_bld, as_ge_at, facex, facey);
+         face_sdx = lp_build_select(cint_bld, as_ge_at, sdxnewx, sdxnewy);
+         face_tdx = lp_build_select(cint_bld, as_ge_at, tdxnewx, tdxnewy);
+         face_sdy = lp_build_select(cint_bld, as_ge_at, sdynewx, sdynewy);
+         face_tdy = lp_build_select(cint_bld, as_ge_at, tdynewx, tdynewy);
+
+         face_s = lp_build_select(cint_bld, ar_ge_as_at, snewz, face_s);
+         face_t = lp_build_select(cint_bld, ar_ge_as_at, tnewz, face_t);
+         face = lp_build_select(cint_bld, ar_ge_as_at, facez, face);
+         face_sdx = lp_build_select(cint_bld, ar_ge_as_at, sdxnewz, face_sdx);
+         face_tdx = lp_build_select(cint_bld, ar_ge_as_at, tdxnewz, face_tdx);
+         face_sdy = lp_build_select(cint_bld, ar_ge_as_at, sdynewz, face_sdy);
+         face_tdy = lp_build_select(cint_bld, ar_ge_as_at, tdynewz, face_tdy);
+
+         face_s = LLVMBuildBitCast(builder, face_s,
+                                  lp_build_vec_type(gallivm, coord_bld->type), "");
+         face_t = LLVMBuildBitCast(builder, face_t,
+                                  lp_build_vec_type(gallivm, coord_bld->type), "");
+         face_sdx = LLVMBuildBitCast(builder, face_sdx,
+                                     lp_build_vec_type(gallivm, coord_bld->type), "");
+         face_tdx = LLVMBuildBitCast(builder, face_tdx,
+                                     lp_build_vec_type(gallivm, coord_bld->type), "");
+         face_sdy = LLVMBuildBitCast(builder, face_sdy,
+                                     lp_build_vec_type(gallivm, coord_bld->type), "");
+         face_tdy = LLVMBuildBitCast(builder, face_tdy,
+                                     lp_build_vec_type(gallivm, coord_bld->type), "");
+
+         /* deriv math, dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma */
+         madxdivma = lp_build_mul(coord_bld, madx, imapos);
+         tmp = lp_build_mul(coord_bld, madxdivma, face_s);
+         tmp = lp_build_sub(coord_bld, face_sdx, tmp);
+         derivs_out->ddx[0] = lp_build_mul(coord_bld, tmp, imahalfpos);
+
+         /* dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma */
+         tmp = lp_build_mul(coord_bld, madxdivma, face_t);
+         tmp = lp_build_sub(coord_bld, face_tdx, tmp);
+         derivs_out->ddx[1] = lp_build_mul(coord_bld, tmp, imahalfpos);
+
+         /* dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma */
+         madydivma = lp_build_mul(coord_bld, mady, imapos);
+         tmp = lp_build_mul(coord_bld, madydivma, face_s);
+         tmp = lp_build_sub(coord_bld, face_sdy, tmp);
+         derivs_out->ddy[0] = lp_build_mul(coord_bld, tmp, imahalfpos);
+
+         /* dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma */
+         tmp = lp_build_mul(coord_bld, madydivma, face_t);
+         tmp = lp_build_sub(coord_bld, face_tdy, tmp);
+         derivs_out->ddy[1] = lp_build_mul(coord_bld, tmp, imahalfpos);
+
+         mai = LLVMBuildBitCast(builder, ma, lp_build_vec_type(gallivm, intctype), "");
+         signma = LLVMBuildLShr(builder, mai, signshift, "");
+         coords[2] = LLVMBuildOr(builder, face, signma, "face");
+
+         /* project coords */
+         face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
+         face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
+
+         coords[0] = lp_build_add(coord_bld, face_s, posHalf);
+         coords[1] = lp_build_add(coord_bld, face_t, posHalf);
+
+         return;
+      }
+
+      else if (need_derivs) {
          LLVMValueRef ddx_ddy[2], tmp[3], rho_vec;
          static const unsigned char swizzle0[] = { /* no-op swizzle */
             0, LP_BLD_SWIZZLE_DONTCARE,
@@ -1590,10 +1777,10 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
           */
          ma = lp_build_select(coord_bld, as_ge_at, s, t);
          ma = lp_build_select(coord_bld, ar_ge_as_at, r, ma);
-         ima = lp_build_cube_imapos(coord_bld, ma);
-         s = lp_build_mul(coord_bld, s, ima);
-         t = lp_build_mul(coord_bld, t, ima);
-         r = lp_build_mul(coord_bld, r, ima);
+         imahalfpos = lp_build_cube_imapos(coord_bld, ma);
+         s = lp_build_mul(coord_bld, s, imahalfpos);
+         t = lp_build_mul(coord_bld, t, imahalfpos);
+         r = lp_build_mul(coord_bld, r, imahalfpos);
 
          /*
           * This isn't quite the same as the "ordinary" (3d deriv) path since we
@@ -1651,14 +1838,6 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
       snewz = LLVMBuildXor(builder, signr, si, "");
       tnewz = tnegi;
 
-      /* XXX on x86 unclear if we should cast the values back to float
-       * or not - on some cpus (nehalem) pblendvb has twice the throughput
-       * of blendvps though on others there just might be domain
-       * transition penalties when using it (this depends on what llvm
-       * will chose for the bit ops above so there appears no "right way",
-       * but given the boatload of selects let's just use the int type).
-       */
-
       /* select/mirror */
       if (!need_derivs) {
          ma = lp_build_select(coord_bld, as_ge_at, s, t);
@@ -1690,9 +1869,9 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
 
       /* project coords */
       if (!need_derivs) {
-         ima = lp_build_cube_imapos(coord_bld, ma);
-         face_s = lp_build_mul(coord_bld, face_s, ima);
-         face_t = lp_build_mul(coord_bld, face_t, ima);
+         imahalfpos = lp_build_cube_imapos(coord_bld, ma);
+         face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
+         face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
       }
 
       coords[0] = lp_build_add(coord_bld, face_s, posHalf);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 803a99e..70f0350 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -457,8 +457,9 @@ lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
 void
 lp_build_cube_lookup(struct lp_build_sample_context *bld,
                      LLVMValueRef *coords,
-                     const struct lp_derivatives *derivs, /* optional */
+                     const struct lp_derivatives *derivs_in, /* optional */
                      LLVMValueRef *rho,
+                     struct lp_derivatives *derivs_out, /* optional */
                      boolean need_derivs);
 
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 33378bc..54dee25 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -1387,6 +1387,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
    const unsigned target = bld->static_texture_state->target;
    LLVMValueRef first_level, cube_rho = NULL;
    LLVMValueRef lod_ipart = NULL;
+   struct lp_derivatives cube_derivs;
 
    /*
    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
@@ -1403,7 +1404,8 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
                       mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
                       !bld->static_sampler_state->min_max_lod_equal &&
                       !explicit_lod);
-      lp_build_cube_lookup(bld, coords, derivs, &cube_rho, need_derivs);
+      lp_build_cube_lookup(bld, coords, derivs, &cube_rho, &cube_derivs, need_derivs);
+      derivs = &cube_derivs;
    }
    else if (target == PIPE_TEXTURE_1D_ARRAY ||
             target == PIPE_TEXTURE_2D_ARRAY) {
@@ -2163,9 +2165,24 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
     * avoided like min and max lod being equal.
     */
    bld.num_mips = bld.num_lods = 1;
-   if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
-       (explicit_lod || lod_bias ||
-        (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) {
+
+   if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
+       (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
+       (static_texture_state->target == PIPE_TEXTURE_CUBE) &&
+       (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+      /*
+       * special case for using per-pixel lod even for implicit lod,
+       * which is generally never required (ok by APIs) except to please
+       * some (somewhat broken imho) tests (because per-pixel face selection
+       * can cause derivatives to be different for pixels outside the primitive
+       * due to the major axis division even if pre-project derivatives are
+       * looking normal).
+       */
+      bld.num_mips = type.length;
+      bld.num_lods = type.length;
+   }
+   else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
+       (explicit_lod || lod_bias || derivs)) {
       if ((is_fetch && target != PIPE_BUFFER) ||
           (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
          bld.num_mips = type.length;
@@ -2371,9 +2388,15 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
          bld4.texel_type.length = 4;
 
          bld4.num_mips = bld4.num_lods = 1;
+         if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
+             (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
+             (static_texture_state->target == PIPE_TEXTURE_CUBE) &&
+             (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+            bld4.num_mips = type4.length;
+            bld4.num_lods = type4.length;
+         }
          if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
-             (explicit_lod || lod_bias ||
-              (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) {
+             (explicit_lod || lod_bias || derivs)) {
             if ((is_fetch && target != PIPE_BUFFER) ||
                 (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
                bld4.num_mips = type4.length;
-- 
1.7.9.5


More information about the mesa-dev mailing list