[Mesa-dev] [PATCH] gallivm: do per-pixel lod calculations for explicit lod

sroland at vmware.com sroland at vmware.com
Thu Jun 27 16:50:32 PDT 2013


From: Roland Scheidegger <sroland at vmware.com>

d3d10 requires per-pixel lod calculations for explicit lod, lod bias and
explicit derivatives, and we should probably do it for OpenGL too - at least
if they are used from vertex or geometry shaders (so doesn't apply to lod
bias) this doesn't just affect neighboring pixels.
Some code was already there to handle this so fix it up and enable it.
There will no doubt be a performance hit unfortunately, we could do better
if we'd knew we had a real vector shift instruction (with variable shift
count) but this requires AVX2 on x86 (or a AMD Bulldozer family cpu).
Don't do anything for lod bias and explicit derivatives yet, though
no special magic should be needed for them neither.
Likewise, the size query is still broken just the same.
---
 src/gallium/auxiliary/gallivm/lp_bld_sample.c     |  110 +++++++++---------
 src/gallium/auxiliary/gallivm/lp_bld_sample.h     |   12 +-
 src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c |   26 ++---
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |  128 +++++++++++++--------
 4 files changed, 155 insertions(+), 121 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index d689c7b..c2efec9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -215,7 +215,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
    struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
    struct lp_build_context *float_bld = &bld->float_bld;
    struct lp_build_context *coord_bld = &bld->coord_bld;
-   struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
+   struct lp_build_context *levelf_bld = &bld->levelf_bld;
    const unsigned dims = bld->dims;
    LLVMValueRef ddx_ddy[2];
    LLVMBuilderRef builder = bld->gallivm->builder;
@@ -235,6 +235,8 @@ lp_build_rho(struct lp_build_sample_context *bld,
 
    /* Note that all simplified calculations will only work for isotropic filtering */
 
+   assert(bld->num_lods != length);
+
    first_level = bld->dynamic_state->first_level(bld->dynamic_state,
                                                  bld->gallivm, texture_unit);
    first_level_vec = lp_build_broadcast_scalar(int_size_bld, first_level);
@@ -248,14 +250,14 @@ lp_build_rho(struct lp_build_sample_context *bld,
        * Cube map code did already everything except size mul and per-quad extraction.
        */
       rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                      perquadf_bld->type, cube_rho, 0);
+                                      levelf_bld->type, cube_rho, 0);
       if (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) {
-         rho = lp_build_sqrt(perquadf_bld, rho);
+         rho = lp_build_sqrt(levelf_bld, rho);
       }
       /* Could optimize this for single quad just skip the broadcast */
       cubesize = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
-                                            perquadf_bld->type, float_size, index0);
-      rho = lp_build_mul(perquadf_bld, cubesize, rho);
+                                            levelf_bld->type, float_size, index0);
+      rho = lp_build_mul(levelf_bld, cubesize, rho);
    }
    else if (derivs && !(bld->static_texture_state->target == PIPE_TEXTURE_CUBE)) {
       LLVMValueRef ddmax[3], ddx[3], ddy[3];
@@ -289,12 +291,12 @@ lp_build_rho(struct lp_build_sample_context *bld,
          }
          rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
          rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                         perquadf_bld->type, rho_vec, 0);
+                                         levelf_bld->type, rho_vec, 0);
          /*
           * note that as long as we don't care about per-pixel lod could reduce math
           * more (at some shuffle cost), but for now only do sqrt after packing.
           */
-         rho = lp_build_sqrt(perquadf_bld, rho);
+         rho = lp_build_sqrt(levelf_bld, rho);
       }
       else {
          rho_vec = ddmax[0];
@@ -309,7 +311,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
           * since we can't handle per-pixel rho/lod from now on (TODO).
           */
          rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                         perquadf_bld->type, rho_vec, 0);
+                                         levelf_bld->type, rho_vec, 0);
       }
    }
    else {
@@ -381,8 +383,8 @@ lp_build_rho(struct lp_build_sample_context *bld,
          rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
 
          rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                         perquadf_bld->type, rho_vec, 0);
-         rho = lp_build_sqrt(perquadf_bld, rho);
+                                         levelf_bld->type, rho_vec, 0);
+         rho = lp_build_sqrt(levelf_bld, rho);
       }
       else {
          ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
@@ -462,7 +464,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
                }
             }
             rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                            perquadf_bld->type, rho, 0);
+                                            levelf_bld->type, rho, 0);
          }
          else {
             if (dims <= 1) {
@@ -652,11 +654,11 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
 
 {
    LLVMBuilderRef builder = bld->gallivm->builder;
-   struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
+   struct lp_build_context *levelf_bld = &bld->levelf_bld;
    LLVMValueRef lod;
 
-   *out_lod_ipart = bld->perquadi_bld.zero;
-   *out_lod_fpart = perquadf_bld->zero;
+   *out_lod_ipart = bld->leveli_bld.zero;
+   *out_lod_fpart = levelf_bld->zero;
 
    if (bld->static_sampler_state->min_max_lod_equal) {
       /* User is forcing sampling from a particular mipmap level.
@@ -666,12 +668,15 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
          bld->dynamic_state->min_lod(bld->dynamic_state,
                                      bld->gallivm, sampler_unit);
 
-      lod = lp_build_broadcast_scalar(perquadf_bld, min_lod);
+      lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
    }
    else {
       if (explicit_lod) {
-         lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
-                                         perquadf_bld->type, explicit_lod, 0);
+         if (bld->num_lods != bld->coord_type.length)
+            lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
+                                            levelf_bld->type, explicit_lod, 0);
+         else
+            lod = explicit_lod;
       }
       else {
          LLVMValueRef rho;
@@ -694,29 +699,29 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
 
             if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
                 mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
-               *out_lod_ipart = lp_build_ilog2(perquadf_bld, rho);
-               *out_lod_fpart = perquadf_bld->zero;
+               *out_lod_ipart = lp_build_ilog2(levelf_bld, rho);
+               *out_lod_fpart = levelf_bld->zero;
                return;
             }
             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
                 !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
-               lp_build_brilinear_rho(perquadf_bld, rho, BRILINEAR_FACTOR,
+               lp_build_brilinear_rho(levelf_bld, rho, BRILINEAR_FACTOR,
                                       out_lod_ipart, out_lod_fpart);
                return;
             }
          }
 
          if (0) {
-            lod = lp_build_log2(perquadf_bld, rho);
+            lod = lp_build_log2(levelf_bld, rho);
          }
          else {
-            lod = lp_build_fast_log2(perquadf_bld, rho);
+            lod = lp_build_fast_log2(levelf_bld, rho);
          }
 
          /* add shader lod bias */
          if (lod_bias) {
             lod_bias = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
-                  perquadf_bld->type, lod_bias, 0);
+                  levelf_bld->type, lod_bias, 0);
             lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
          }
       }
@@ -726,7 +731,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
          LLVMValueRef sampler_lod_bias =
             bld->dynamic_state->lod_bias(bld->dynamic_state,
                                          bld->gallivm, sampler_unit);
-         sampler_lod_bias = lp_build_broadcast_scalar(perquadf_bld,
+         sampler_lod_bias = lp_build_broadcast_scalar(levelf_bld,
                                                       sampler_lod_bias);
          lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
       }
@@ -736,33 +741,33 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
          LLVMValueRef max_lod =
             bld->dynamic_state->max_lod(bld->dynamic_state,
                                         bld->gallivm, sampler_unit);
-         max_lod = lp_build_broadcast_scalar(perquadf_bld, max_lod);
+         max_lod = lp_build_broadcast_scalar(levelf_bld, max_lod);
 
-         lod = lp_build_min(perquadf_bld, lod, max_lod);
+         lod = lp_build_min(levelf_bld, lod, max_lod);
       }
       if (bld->static_sampler_state->apply_min_lod) {
          LLVMValueRef min_lod =
             bld->dynamic_state->min_lod(bld->dynamic_state,
                                         bld->gallivm, sampler_unit);
-         min_lod = lp_build_broadcast_scalar(perquadf_bld, min_lod);
+         min_lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
 
-         lod = lp_build_max(perquadf_bld, lod, min_lod);
+         lod = lp_build_max(levelf_bld, lod, min_lod);
       }
    }
 
    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
       if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
-         lp_build_brilinear_lod(perquadf_bld, lod, BRILINEAR_FACTOR,
+         lp_build_brilinear_lod(levelf_bld, lod, BRILINEAR_FACTOR,
                                 out_lod_ipart, out_lod_fpart);
       }
       else {
-         lp_build_ifloor_fract(perquadf_bld, lod, out_lod_ipart, out_lod_fpart);
+         lp_build_ifloor_fract(levelf_bld, lod, out_lod_ipart, out_lod_fpart);
       }
 
       lp_build_name(*out_lod_fpart, "lod_fpart");
    }
    else {
-      *out_lod_ipart = lp_build_iround(perquadf_bld, lod);
+      *out_lod_ipart = lp_build_iround(levelf_bld, lod);
    }
 
    lp_build_name(*out_lod_ipart, "lod_ipart");
@@ -784,20 +789,20 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
                            LLVMValueRef lod_ipart,
                            LLVMValueRef *level_out)
 {
-   struct lp_build_context *perquadi_bld = &bld->perquadi_bld;
+   struct lp_build_context *leveli_bld = &bld->leveli_bld;
    LLVMValueRef first_level, last_level, level;
 
    first_level = bld->dynamic_state->first_level(bld->dynamic_state,
                                                  bld->gallivm, texture_unit);
    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
                                                bld->gallivm, texture_unit);
-   first_level = lp_build_broadcast_scalar(perquadi_bld, first_level);
-   last_level = lp_build_broadcast_scalar(perquadi_bld, last_level);
+   first_level = lp_build_broadcast_scalar(leveli_bld, first_level);
+   last_level = lp_build_broadcast_scalar(leveli_bld, last_level);
 
-   level = lp_build_add(perquadi_bld, lod_ipart, first_level);
+   level = lp_build_add(leveli_bld, lod_ipart, first_level);
 
    /* clamp level to legal range of levels */
-   *level_out = lp_build_clamp(perquadi_bld, level, first_level, last_level);
+   *level_out = lp_build_clamp(leveli_bld, level, first_level, last_level);
 }
 
 
@@ -815,8 +820,8 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                            LLVMValueRef *level1_out)
 {
    LLVMBuilderRef builder = bld->gallivm->builder;
-   struct lp_build_context *perquadi_bld = &bld->perquadi_bld;
-   struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
+   struct lp_build_context *leveli_bld = &bld->leveli_bld;
+   struct lp_build_context *levelf_bld = &bld->levelf_bld;
    LLVMValueRef first_level, last_level;
    LLVMValueRef clamp_min;
    LLVMValueRef clamp_max;
@@ -825,11 +830,11 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                                                  bld->gallivm, texture_unit);
    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
                                                bld->gallivm, texture_unit);
-   first_level = lp_build_broadcast_scalar(perquadi_bld, first_level);
-   last_level = lp_build_broadcast_scalar(perquadi_bld, last_level);
+   first_level = lp_build_broadcast_scalar(leveli_bld, first_level);
+   last_level = lp_build_broadcast_scalar(leveli_bld, last_level);
 
-   *level0_out = lp_build_add(perquadi_bld, lod_ipart, first_level);
-   *level1_out = lp_build_add(perquadi_bld, *level0_out, perquadi_bld->one);
+   *level0_out = lp_build_add(leveli_bld, lod_ipart, first_level);
+   *level1_out = lp_build_add(leveli_bld, *level0_out, leveli_bld->one);
 
    /*
     * Clamp both *level0_out and *level1_out to [first_level, last_level], with
@@ -843,7 +848,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
     * converting to our lp_bld_logic helpers.
     */
 #if HAVE_LLVM < 0x0301
-   assert(perquadi_bld->type.length == 1);
+   assert(leveli_bld->type.length == 1);
 #endif
 
    /* *level0_out < first_level */
@@ -858,7 +863,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                                  first_level, *level1_out, "");
 
    *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
-                                      perquadf_bld->zero, *lod_fpart_inout, "");
+                                      levelf_bld->zero, *lod_fpart_inout, "");
 
    /* *level0_out >= last_level */
    clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
@@ -872,7 +877,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                                  last_level, *level1_out, "");
 
    *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
-                                      perquadf_bld->zero, *lod_fpart_inout, "");
+                                      levelf_bld->zero, *lod_fpart_inout, "");
 
    lp_build_name(*level0_out, "texture%u_miplevel0", texture_unit);
    lp_build_name(*level1_out, "texture%u_miplevel1", texture_unit);
@@ -1087,7 +1092,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
             LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
 
             ileveli = lp_build_extract_broadcast(bld->gallivm,
-                                                 bld->perquadi_bld.type,
+                                                 bld->leveli_bld.type,
                                                  bld4.type,
                                                  ilevel,
                                                  indexi);
@@ -1131,10 +1136,9 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
                tmp[i] = bld->int_size;
                tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i], ilevel1);
             }
-            int_size_vec = lp_build_concat(bld->gallivm,
-                                           tmp,
-                                           bld->int_size_in_bld.type,
-                                           bld->num_lods);
+            *out_size = lp_build_concat(bld->gallivm, tmp,
+                                        bld->int_size_in_bld.type,
+                                        bld->num_lods);
          }
       }
    }
@@ -1218,10 +1222,10 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
          *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
                                                 coord_type, size, 0);
          if (dims >= 2) {
-            *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
-                                                   coord_type, size, 1);
+            *out_height = lp_build_pack_aos_scalars(bld->gallivm, size_type,
+                                                    coord_type, size, 1);
             if (dims == 3) {
-               *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
+               *out_depth = lp_build_pack_aos_scalars(bld->gallivm, size_type,
                                                       coord_type, size, 2);
             }
          }
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index cde8ce9..126b185 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -268,13 +268,13 @@ struct lp_build_sample_context
    struct lp_type texel_type;
    struct lp_build_context texel_bld;
 
-   /** Float per-quad type */
-   struct lp_type perquadf_type;
-   struct lp_build_context perquadf_bld;
+   /** Float level type */
+   struct lp_type levelf_type;
+   struct lp_build_context levelf_bld;
 
-   /** Int per-quad type */
-   struct lp_type perquadi_type;
-   struct lp_build_context perquadi_bld;
+   /** Int level type */
+   struct lp_type leveli_type;
+   struct lp_build_context leveli_bld;
 
    /* Common dynamic state values */
    LLVMValueRef row_stride_array;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
index 104c24d..da416aa 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -1422,8 +1422,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 
    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
       LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
-                                                     bld->perquadf_bld.type, 256.0);
-      LLVMTypeRef i32vec_type = lp_build_vec_type(bld->gallivm, bld->perquadi_bld.type);
+                                                     bld->levelf_bld.type, 256.0);
+      LLVMTypeRef i32vec_type = bld->leveli_bld.vec_type;
       struct lp_build_if_state if_ctx;
       LLVMValueRef need_lerp;
       unsigned num_quads = bld->coord_bld.type.length / 4;
@@ -1433,9 +1433,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
       lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16");
 
       /* need_lerp = lod_fpart > 0 */
-      if (num_quads == 1) {
+      if (bld->num_lods == 1) {
          need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
-                                   lod_fpart, bld->perquadi_bld.zero,
+                                   lod_fpart, bld->leveli_bld.zero,
                                    "need_lerp");
       }
       else {
@@ -1450,9 +1450,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
           * lod_fpart values have same sign.
           * We can however then skip the greater than comparison.
           */
-         lod_fpart = lp_build_max(&bld->perquadi_bld, lod_fpart,
-                                  bld->perquadi_bld.zero);
-         need_lerp = lp_build_any_true_range(&bld->perquadi_bld, num_quads, lod_fpart);
+         lod_fpart = lp_build_max(&bld->leveli_bld, lod_fpart,
+                                  bld->leveli_bld.zero);
+         need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, lod_fpart);
       }
 
       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
@@ -1465,9 +1465,6 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
          lp_build_mipmap_level_sizes(bld, ilevel1,
                                      &size1,
                                      &row_stride1_vec, &img_stride1_vec);
-         lp_build_mipmap_level_sizes(bld, ilevel1,
-                                     &size1,
-                                     &row_stride1_vec, &img_stride1_vec);
          if (bld->num_lods == 1) {
             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
          }
@@ -1511,7 +1508,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 
          /* interpolate samples from the two mipmap levels */
 
-         if (num_quads == 1) {
+         if (num_quads == 1 && bld->num_lods == 1) {
             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, "");
             lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart);
 
@@ -1526,17 +1523,16 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 #endif
          }
          else {
-            const unsigned num_chans_per_quad = 4 * 4;
-            LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->perquadi_bld.type.length);
+            unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods;
+            LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->leveli_bld.type.length);
             LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
 
             /* Take the LSB of lod_fpart */
             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, "");
 
             /* Broadcast each lod weight into their respective channels */
-            assert(u8n_bld.type.length == num_quads * num_chans_per_quad);
             for (i = 0; i < u8n_bld.type.length; ++i) {
-               shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_quad);
+               shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_lod);
             }
             lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type),
                                                LLVMConstVector(shuffle, u8n_bld.type.length), "");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index e0a59d0..06b672c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -979,17 +979,17 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
       struct lp_build_if_state if_ctx;
       LLVMValueRef need_lerp;
-      unsigned num_quads = bld->coord_bld.type.length / 4;
 
       /* need_lerp = lod_fpart > 0 */
-      if (num_quads == 1) {
+      if (bld->num_lods == 1) {
          need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
-                                   lod_fpart, bld->perquadf_bld.zero,
+                                   lod_fpart, bld->levelf_bld.zero,
                                    "need_lerp");
       }
       else {
          /*
-          * We'll do mip filtering if any of the quads need it.
+          * We'll do mip filtering if any of the quads (or individual
+          * pixel in case of per-pixel lod) need it.
           * It might be better to split the vectors here and only fetch/filter
           * quads which need it.
           */
@@ -998,13 +998,13 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
           * negative values which would screw up filtering if not all
           * lod_fpart values have same sign.
           */
-         lod_fpart = lp_build_max(&bld->perquadf_bld, lod_fpart,
-                                  bld->perquadf_bld.zero);
-         need_lerp = lp_build_compare(bld->gallivm, bld->perquadf_bld.type,
+         lod_fpart = lp_build_max(&bld->levelf_bld, lod_fpart,
+                                  bld->levelf_bld.zero);
+         need_lerp = lp_build_compare(bld->gallivm, bld->levelf_bld.type,
                                       PIPE_FUNC_GREATER,
-                                      lod_fpart, bld->perquadf_bld.zero);
-         need_lerp = lp_build_any_true_range(&bld->perquadi_bld, num_quads, need_lerp);
-     }
+                                      lod_fpart, bld->levelf_bld.zero);
+         need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, need_lerp);
+      }
 
       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
       {
@@ -1036,10 +1036,11 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 
          /* interpolate samples from the two mipmap levels */
 
-         lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
-                                                           bld->perquadf_bld.type,
-                                                           bld->texel_bld.type,
-                                                           lod_fpart);
+         if (bld->num_lods != bld->coord_type.length)
+            lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
+                                                              bld->levelf_bld.type,
+                                                              bld->texel_bld.type,
+                                                              lod_fpart);
 
          for (chan = 0; chan < 4; chan++) {
             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
@@ -1143,7 +1144,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
                             mip_filter,
                             lod_ipart, lod_fpart);
    } else {
-      *lod_ipart = bld->perquadi_bld.zero;
+      *lod_ipart = bld->leveli_bld.zero;
    }
 
    /*
@@ -1166,7 +1167,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
       else {
          first_level = bld->dynamic_state->first_level(bld->dynamic_state,
                                                        bld->gallivm, texture_index);
-         first_level = lp_build_broadcast_scalar(&bld->perquadi_bld, first_level);
+         first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
          *ilevel0 = first_level;
       }
       break;
@@ -1295,7 +1296,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
                      const LLVMValueRef *offsets,
                      LLVMValueRef *colors_out)
 {
-   struct lp_build_context *perquadi_bld = &bld->perquadi_bld;
+   struct lp_build_context *perquadi_bld = &bld->leveli_bld;
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    unsigned dims = bld->dims, chan;
    unsigned target = bld->static_texture_state->target;
@@ -1307,8 +1308,13 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
 
    /* XXX just like ordinary sampling, we don't handle per-pixel lod (yet). */
    if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
-      ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
-                                         perquadi_bld->type, explicit_lod, 0);
+      if (bld->num_lods != int_coord_bld->type.length) {
+         ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
+                                            perquadi_bld->type, explicit_lod, 0);
+      }
+      else {
+         ilevel = explicit_lod;
+      }
       lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel);
    }
    else {
@@ -1529,10 +1535,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
    bld.float_size_in_type.length = dims > 1 ? 4 : 1;
    bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
    bld.texel_type = type;
-   bld.perquadf_type = type;
-   /* we want native vector size to be able to use our intrinsics */
-   bld.perquadf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
-   bld.perquadi_type = lp_int_type(bld.perquadf_type);
 
    /* always using the first channel hopefully should be safe,
     * if not things WILL break in other places anyway.
@@ -1567,17 +1569,30 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
     * There are other situations where at least the multiple int lods could be
     * avoided like min and max lod being equal.
     */
-   if ((is_fetch && explicit_lod && bld.static_texture_state->target != PIPE_BUFFER) ||
-       (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+   if (explicit_lod &&
+       ((is_fetch && bld.static_texture_state->target != PIPE_BUFFER) ||
+        (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
+      bld.num_lods = type.length;
+   else if (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE) {
       bld.num_lods = num_quads;
    }
    else {
       bld.num_lods = 1;
    }
 
+   bld.levelf_type = type;
+   /* we want native vector size to be able to use our intrinsics */
+   if (bld.num_lods != type.length) {
+      bld.levelf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
+   }
+   bld.leveli_type = lp_int_type(bld.levelf_type);
    bld.float_size_type = bld.float_size_in_type;
-   bld.float_size_type.length = bld.num_lods > 1 ? type.length :
-                                   bld.float_size_in_type.length;
+   /* the vectors here are GIGANTIC (up to 32xf32), rely on llvm there */
+   if (bld.num_lods > 1) {
+      bld.float_size_type.length = bld.num_lods == type.length ?
+                                      bld.num_lods * bld.float_size_in_type.length :
+                                      type.length;
+   }
    bld.int_size_type = lp_int_type(bld.float_size_type);
 
    lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
@@ -1590,8 +1605,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
    lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
    lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
    lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
-   lp_build_context_init(&bld.perquadf_bld, gallivm, bld.perquadf_type);
-   lp_build_context_init(&bld.perquadi_bld, gallivm, bld.perquadi_type);
+   lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
+   lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
 
    /* Get the dynamic state */
    tex_width = dynamic_state->width(dynamic_state, gallivm, texture_index);
@@ -1735,14 +1750,32 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
          bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
          bld4.texel_type = bld.texel_type;
          bld4.texel_type.length = 4;
-         bld4.perquadf_type = type4;
+         bld4.levelf_type = type4;
          /* we want native vector size to be able to use our intrinsics */
-         bld4.perquadf_type.length = 1;
-         bld4.perquadi_type = lp_int_type(bld4.perquadf_type);
+         bld4.levelf_type.length = 1;
+         bld4.leveli_type = lp_int_type(bld4.levelf_type);
+
+         if (explicit_lod &&
+             ((is_fetch && bld.static_texture_state->target != PIPE_BUFFER) ||
+              (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
+            bld4.num_lods = type4.length;
+         else
+            bld4.num_lods = 1;
 
-         bld4.num_lods = 1;
-         bld4.int_size_type = bld4.int_size_in_type;
+         bld4.levelf_type = type4;
+         /* we want native vector size to be able to use our intrinsics */
+         if (bld4.num_lods != type4.length) {
+            bld4.levelf_type.length = 1;
+         }
+         bld4.leveli_type = lp_int_type(bld4.levelf_type);
          bld4.float_size_type = bld4.float_size_in_type;
+         /* the vectors here are GIGANTIC (up to 16xf32) */
+         if (bld4.num_lods > 1) {
+            bld4.float_size_type.length = bld4.num_lods == type4.length ?
+                                            bld4.num_lods * bld4.float_size_in_type.length :
+                                            type4.length;
+         }
+         bld4.int_size_type = lp_int_type(bld4.float_size_type);
 
          lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
          lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
@@ -1754,15 +1787,15 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
          lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
          lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
          lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
-         lp_build_context_init(&bld4.perquadf_bld, gallivm, bld4.perquadf_type);
-         lp_build_context_init(&bld4.perquadi_bld, gallivm, bld4.perquadi_type);
+         lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
+         lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
 
          for (i = 0; i < num_quads; i++) {
             LLVMValueRef s4, t4, r4;
-            LLVMValueRef lod_iparts, lod_fparts = NULL;
-            LLVMValueRef ilevel0s, ilevel1s = NULL;
-            LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
+            LLVMValueRef lod_ipart4, lod_fpart4 = NULL;
+            LLVMValueRef ilevel04, ilevel14 = NULL;
             LLVMValueRef offsets4[4] = { NULL };
+            unsigned num_lods = bld4.num_lods;
 
             s4 = lp_build_extract_range(gallivm, s, 4*i, 4);
             t4 = lp_build_extract_range(gallivm, t, 4*i, 4);
@@ -1777,27 +1810,27 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
                   }
                }
             }
-            lod_iparts = LLVMBuildExtractElement(builder, lod_ipart, indexi, "");
-            ilevel0s = LLVMBuildExtractElement(builder, ilevel0, indexi, "");
+            lod_ipart4 = lp_build_extract_range(gallivm, lod_ipart, num_lods * i, num_lods);
+            ilevel04 = lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
-               ilevel1s = LLVMBuildExtractElement(builder, ilevel1, indexi, "");
-               lod_fparts = LLVMBuildExtractElement(builder, lod_fpart, indexi, "");
+               ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
+               lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
             }
 
             if (use_aos) {
                /* do sampling/filtering with fixed pt arithmetic */
                lp_build_sample_aos(&bld4, sampler_index,
                                    s4, t4, r4, offsets4,
-                                   lod_iparts, lod_fparts,
-                                   ilevel0s, ilevel1s,
+                                   lod_ipart4, lod_fpart4,
+                                   ilevel04, ilevel14,
                                    texelout4);
             }
 
             else {
                lp_build_sample_general(&bld4, sampler_index,
                                        s4, t4, r4, offsets4,
-                                       lod_iparts, lod_fparts,
-                                       ilevel0s, ilevel1s,
+                                       lod_ipart4, lod_fpart4,
+                                       ilevel04, ilevel14,
                                        texelout4);
             }
             for (j = 0; j < 4; j++) {
@@ -1864,6 +1897,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
    lp_build_context_init(&bld_int_vec, gallivm, lp_type_int_vec(32, 128));
 
    if (explicit_lod) {
+      /* FIXME: this needs to honor per-element lod */
       lod = LLVMBuildExtractElement(gallivm->builder, explicit_lod, lp_build_const_int32(gallivm, 0), "");
       first_level = dynamic_state->first_level(dynamic_state, gallivm, texture_unit);
       lod = lp_build_broadcast_scalar(&bld_int_vec,
-- 
1.7.9.5


More information about the mesa-dev mailing list