[Mesa-dev] [PATCH] gallivm: fix min/mag switchover point for nearest/none mip filter

Thu Aug 22 19:34:03 PDT 2013

From: Roland Scheidegger <sroland at vmware.com>

Previously, the min/mag switchover point when using nearest/none mip
filter was effectively -0.5 which can't be right. Looks like new OpenGL
thinks it's ok if it's always 0.0 (older versions required 0.5 in some
cases), let's hope everybody else thinks that's fine too.
Refactor this slightly and get the per-quad/per-pixel min/mag decision
values further down to sampling, though still only the first component
is used yet.
While here also fix code trying to skip lod bias application etc. when
mipfilter is none, as this is still needed for determining min/mag filter.
---
 src/gallium/auxiliary/gallivm/lp_bld_sample.c     |   48 ++++++++++----
 src/gallium/auxiliary/gallivm/lp_bld_sample.h     |    3 +-
 src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c |   25 +++-----
 src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h |    2 +-
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |   69 ++++++++++-----------
 5 files changed, 81 insertions(+), 66 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 9554104..89d7249 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -162,7 +162,8 @@ lp_sampler_static_sampler_state(struct lp_static_sampler_state *state,
       state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
    }
 
-   if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+   if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE ||
+       state->min_img_filter != state->mag_img_filter) {
       if (sampler->lod_bias != 0.0f) {
          state->lod_bias_non_zero = 1;
       }
@@ -669,9 +670,10 @@ lp_build_brilinear_rho(struct lp_build_context *bld,
  * \param derivs  partial derivatives of (s, t, r, q) with respect to X and Y
  * \param lod_bias  optional float vector with the shader lod bias
  * \param explicit_lod  optional float vector with the explicit lod
- * \param width  scalar int texture width
- * \param height  scalar int texture height
- * \param depth  scalar int texture depth
+ * \param cube_rho  rho calculated by cube coord mapping (optional)
+ * \param out_lod_ipart  integer part of lod
+ * \param out_lod_fpart  float part of lod (never larger than 1 but may be negative)
+ * \param out_lod_positive  (mask) if lod is positive (i.e. texture is minified)
  *
  * The resulting lod is scalar per quad, so only the first value per quad
  * passed in from lod_bias, explicit_lod is used.
@@ -689,7 +691,8 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
                       LLVMValueRef explicit_lod, /* optional */
                       unsigned mip_filter,
                       LLVMValueRef *out_lod_ipart,
-                      LLVMValueRef *out_lod_fpart)
+                      LLVMValueRef *out_lod_fpart,
+                      LLVMValueRef *out_lod_positive)
 
 {
    LLVMBuilderRef builder = bld->gallivm->builder;
@@ -697,8 +700,27 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
    LLVMValueRef lod;
 
    *out_lod_ipart = bld->leveli_bld.zero;
+   *out_lod_positive = bld->leveli_bld.zero;
    *out_lod_fpart = levelf_bld->zero;
 
+   /*
+    * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture Magnification:
+    * "Implementations may either unconditionally assume c = 0 for the minification
+    * vs. magnification switch-over point, or may choose to make c depend on the
+    * combination of minification and magnification modes as follows: if the
+    * magnification filter is given by LINEAR and the minification filter is given
+    * by NEAREST_MIPMAP_NEAREST or NEAREST_MIPMAP_LINEAR, then c = 0.5. This is
+    * done to ensure that a minified texture does not appear "sharper" than a
+    * magnified texture. Otherwise c = 0."
+    * And 3.9.11 Texture Minification:
+    * "If lod is less than or equal to the constant c (see section 3.9.12) the
+    * texture is said to be magnified; if it is greater, the texture is minified."
+    * So, using 0 as switchover point always, and using magnification for lod == 0.
+    * Note that the always c = 0 behavior is new (first appearing in GL 3.1 spec),
+    * old GL versions required 0.5 for the modes listed above.
+    * I have no clue about the (undocumented) wishes of d3d9/d3d10 here!
+    */
+
    if (bld->static_sampler_state->min_max_lod_equal) {
       /* User is forcing sampling from a particular mipmap level.
        * This is hit during mipmap generation.
@@ -739,21 +761,20 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
             if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
                 mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
                /*
-                * FIXME: this is not entirely correct, as out_lod_ipart is used
-                * both for mip level determination as well as mag/min switchover
-                * point (if different min/mag filters are used). In particular,
-                * lod values between [-0.5,0] (rho between [sqrt(2), 1.0]) will
-                * incorrectly use min filter instead of mag (the non-optimized
-                * calculation further down has exactly the same problem).
+                * Don't actually need both all the time, ipart is needed
+                * for nearest mipfilter, pos_or_zero if min != mag.
                 */
                *out_lod_ipart = lp_build_ilog2(levelf_bld, rho);
-               *out_lod_fpart = levelf_bld->zero;
+               *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
+                                                rho, levelf_bld->one);
                return;
             }
             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
                 !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
                lp_build_brilinear_rho(levelf_bld, rho, BRILINEAR_FACTOR,
                                       out_lod_ipart, out_lod_fpart);
+               *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
+                                                rho, levelf_bld->one);
                return;
             }
          }
@@ -803,6 +824,9 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
       }
    }
 
+   *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
+                                    lod, levelf_bld->zero);
+
    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
       if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
          lp_build_brilinear_lod(levelf_bld, lod, BRILINEAR_FACTOR,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 067a995..a7ebe7e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -388,7 +388,8 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
                       LLVMValueRef explicit_lod, /* optional */
                       unsigned mip_filter,
                       LLVMValueRef *out_lod_ipart,
-                      LLVMValueRef *out_lod_fpart);
+                      LLVMValueRef *out_lod_fpart,
+                      LLVMValueRef *out_lod_positive);
 
 void
 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
index b9227b5..7431388 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -1562,13 +1562,12 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
                     LLVMValueRef t,
                     LLVMValueRef r,
                     const LLVMValueRef *offsets,
-                    LLVMValueRef lod_ipart,
+                    LLVMValueRef lod_positive,
                     LLVMValueRef lod_fpart,
                     LLVMValueRef ilevel0,
                     LLVMValueRef ilevel1,
                     LLVMValueRef texel_out[4])
 {
-   struct lp_build_context *int_bld = &bld->int_bld;
    LLVMBuilderRef builder = bld->gallivm->builder;
    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
@@ -1608,26 +1607,20 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
        * depending on the lod being > 0 or <= 0, respectively.
        */
       struct lp_build_if_state if_ctx;
-      LLVMValueRef minify;
 
       /*
-       * XXX this should take all lods into account, if some are min
-       * some max probably could hack up the coords/weights in the linear
+       * FIXME this should take all lods into account, if some are min
+       * some max probably could hack up the weights in the linear
        * path with selects to work for nearest.
-       * If that's just two quads sitting next to each other it seems
-       * quite ok to do the same filtering method on both though, at
-       * least unless we have explicit lod (and who uses different
-       * min/mag filter with that?)
        */
-      if (bld->num_lods > 1)
-         lod_ipart = LLVMBuildExtractElement(builder, lod_ipart,
-                                              lp_build_const_int32(bld->gallivm, 0), "");
+      if (bld->leveli_bld.type.length > 1)
+         lod_positive = LLVMBuildExtractElement(builder, lod_positive,
+                                                lp_build_const_int32(bld->gallivm, 0), "");
 
-      /* minify = lod >= 0.0 */
-      minify = LLVMBuildICmp(builder, LLVMIntSGE,
-                             lod_ipart, int_bld->zero, "");
+      lod_positive = LLVMBuildTrunc(builder, lod_positive,
+                                    LLVMInt1TypeInContext(bld->gallivm->context), "");
 
-      lp_build_if(&if_ctx, bld->gallivm, minify);
+      lp_build_if(&if_ctx, bld->gallivm, lod_positive);
       {
          /* Use the minification filter */
          lp_build_sample_mipmap(bld,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h
index 6fce971..da503d2 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h
@@ -47,7 +47,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
                     LLVMValueRef t,
                     LLVMValueRef r,
                     const LLVMValueRef *offsets,
-                    LLVMValueRef lod_ipart,
+                    LLVMValueRef lod_positive,
                     LLVMValueRef lod_fpart,
                     LLVMValueRef ilevel0,
                     LLVMValueRef ilevel1,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 743dd0a..8ad3b9f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -1239,7 +1239,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
                        const struct lp_derivatives *derivs, /* optional */
                        LLVMValueRef lod_bias, /* optional */
                        LLVMValueRef explicit_lod, /* optional */
-                       LLVMValueRef *lod_ipart,
+                       LLVMValueRef *lod_pos_or_zero,
                        LLVMValueRef *lod_fpart,
                        LLVMValueRef *ilevel0,
                        LLVMValueRef *ilevel1)
@@ -1249,6 +1249,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
    const unsigned target = bld->static_texture_state->target;
    LLVMValueRef first_level, cube_rho = NULL;
+   LLVMValueRef lod_ipart = NULL;
 
    /*
    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
@@ -1309,9 +1310,10 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
                             coords[0], coords[1], coords[2], cube_rho,
                             derivs, lod_bias, explicit_lod,
                             mip_filter,
-                            lod_ipart, lod_fpart);
+                            &lod_ipart, lod_fpart, lod_pos_or_zero);
    } else {
-      *lod_ipart = bld->leveli_bld.zero;
+      lod_ipart = bld->leveli_bld.zero;
+      *lod_pos_or_zero = bld->leveli_bld.zero;
    }
 
    /*
@@ -1328,8 +1330,8 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
           * We should be able to set ilevel0 = const(0) but that causes
           * bad x86 code to be emitted.
           */
-         assert(*lod_ipart);
-         lp_build_nearest_mip_level(bld, texture_index, *lod_ipart, ilevel0, NULL);
+         assert(lod_ipart);
+         lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL);
       }
       else {
          first_level = bld->dynamic_state->first_level(bld->dynamic_state,
@@ -1339,14 +1341,14 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
       }
       break;
    case PIPE_TEX_MIPFILTER_NEAREST:
-      assert(*lod_ipart);
-      lp_build_nearest_mip_level(bld, texture_index, *lod_ipart, ilevel0, NULL);
+      assert(lod_ipart);
+      lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL);
       break;
    case PIPE_TEX_MIPFILTER_LINEAR:
-      assert(*lod_ipart);
+      assert(lod_ipart);
       assert(*lod_fpart);
       lp_build_linear_mip_levels(bld, texture_index,
-                                 *lod_ipart, lod_fpart,
+                                 lod_ipart, lod_fpart,
                                  ilevel0, ilevel1);
       break;
    }
@@ -1581,13 +1583,12 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
                         unsigned sampler_unit,
                         LLVMValueRef *coords,
                         const LLVMValueRef *offsets,
-                        LLVMValueRef lod_ipart,
+                        LLVMValueRef lod_positive,
                         LLVMValueRef lod_fpart,
                         LLVMValueRef ilevel0,
                         LLVMValueRef ilevel1,
                         LLVMValueRef *colors_out)
 {
-   struct lp_build_context *int_bld = &bld->int_bld;
    LLVMBuilderRef builder = bld->gallivm->builder;
    const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
    const unsigned mip_filter = sampler_state->min_mip_filter;
@@ -1634,26 +1635,20 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
        * depending on the lod being > 0 or <= 0, respectively.
        */
       struct lp_build_if_state if_ctx;
-      LLVMValueRef minify;
 
       /*
-       * XXX this should take all lods into account, if some are min
-       * some max probably could hack up the coords/weights in the linear
+       * FIXME this should take all lods into account, if some are min
+       * some max probably could hack up the weights in the linear
        * path with selects to work for nearest.
-       * If that's just two quads sitting next to each other it seems
-       * quite ok to do the same filtering method on both though, at
-       * least unless we have explicit lod (and who uses different
-       * min/mag filter with that?)
        */
-      if (bld->num_lods > 1)
-         lod_ipart = LLVMBuildExtractElement(builder, lod_ipart,
-                                             lp_build_const_int32(bld->gallivm, 0), "");
+      if (bld->leveli_bld.type.length > 1)
+         lod_positive = LLVMBuildExtractElement(builder, lod_positive,
+                                                lp_build_const_int32(bld->gallivm, 0), "");
 
-      /* minify = lod >= 0.0 */
-      minify = LLVMBuildICmp(builder, LLVMIntSGE,
-                             lod_ipart, int_bld->zero, "");
+      lod_positive = LLVMBuildTrunc(builder, lod_positive,
+                                    LLVMInt1TypeInContext(bld->gallivm->context), "");
 
-      lp_build_if(&if_ctx, bld->gallivm, minify);
+      lp_build_if(&if_ctx, bld->gallivm, lod_positive);
       {
          /* Use the minification filter */
          lp_build_sample_mipmap(bld, sampler_unit,
@@ -1941,6 +1936,10 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
    /*
     * There are other situations where at least the multiple int lods could be
     * avoided like min and max lod being equal.
+    * XXX if num_lods == 1 (for multiple quads) the level bld contexts will still
+    * have length 4. Because lod_selector is always using per quad calcs in this
+    * case, but minification etc. don't need to bother. This is very brittle though
+    * e.g. num_lods might be 1 but still have multiple positive_lod values!
     */
    if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
        (explicit_lod || lod_bias ||
@@ -2034,7 +2033,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
    }
 
    else {
-      LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
+      LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
       LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
       boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
                         /* not sure this is strictly needed or simply impossible */
@@ -2063,7 +2062,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
       lp_build_sample_common(&bld, texture_index, sampler_index,
                              newcoords,
                              derivs, lod_bias, explicit_lod,
-                             &lod_ipart, &lod_fpart,
+                             &lod_positive, &lod_fpart,
                              &ilevel0, &ilevel1);
 
       /*
@@ -2077,10 +2076,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
             if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
                LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
                /*
-                * These parameters are the same for all quads,
-                * could probably simplify.
+                * This parameter is the same for all quads could probably simplify.
                 */
-               lod_ipart = LLVMBuildExtractElement(builder, lod_ipart, index0, "");
                ilevel0 = LLVMBuildExtractElement(builder, ilevel0, index0, "");
             }
          }
@@ -2089,7 +2086,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
             lp_build_sample_aos(&bld, sampler_index,
                                 newcoords[0], newcoords[1],
                                 newcoords[2],
-                                offsets, lod_ipart, lod_fpart,
+                                offsets, lod_positive, lod_fpart,
                                 ilevel0, ilevel1,
                                 texel_out);
          }
@@ -2097,7 +2094,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
          else {
             lp_build_sample_general(&bld, sampler_index,
                                     newcoords, offsets,
-                                    lod_ipart, lod_fpart,
+                                    lod_positive, lod_fpart,
                                     ilevel0, ilevel1,
                                     texel_out);
          }
@@ -2180,7 +2177,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
 
          for (i = 0; i < num_quads; i++) {
             LLVMValueRef s4, t4, r4;
-            LLVMValueRef lod_ipart4, lod_fpart4 = NULL;
+            LLVMValueRef lod_positive4, lod_fpart4 = NULL;
             LLVMValueRef ilevel04, ilevel14 = NULL;
             LLVMValueRef offsets4[4] = { NULL };
             unsigned num_lods = bld4.num_lods;
@@ -2198,7 +2195,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
                   }
                }
             }
-            lod_ipart4 = lp_build_extract_range(gallivm, lod_ipart, num_lods * i, num_lods);
+            lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
             ilevel04 = lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
                ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
@@ -2209,7 +2206,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
                /* do sampling/filtering with fixed pt arithmetic */
                lp_build_sample_aos(&bld4, sampler_index,
                                    s4, t4, r4, offsets4,
-                                   lod_ipart4, lod_fpart4,
+                                   lod_positive4, lod_fpart4,
                                    ilevel04, ilevel14,
                                    texelout4);
             }
@@ -2225,7 +2222,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
 
                lp_build_sample_general(&bld4, sampler_index,
                                        newcoords4, offsets4,
-                                       lod_ipart4, lod_fpart4,
+                                       lod_positive4, lod_fpart4,
                                        ilevel04, ilevel14,
                                        texelout4);
             }
-- 
1.7.9.5