[Mesa-dev] [PATCH 1/2] gallivm: refactor num_lods handling

Wed Aug 28 13:59:14 PDT 2013

LGTM.

Jose

----- Original Message -----
> From: Roland Scheidegger <sroland at vmware.com>
> 
> This is just preparation for per-pixel (or per-quad in case of multiple
> quads)
> min/mag filter since some assumptions about number of miplevels being equal
> to number of lods no longer holds true.
> This change does not change behavior yet (though theoretically when forcing
> per-element path it might be slower with different min/mag filter since the
> code will respect this setting even when there's no mip maps now in this
> case,
> so some lod calcs will be done per-element just ultimately still the same
> filter used for all pixels).
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_sample.c     |  126 +++++++++---------
>  src/gallium/auxiliary/gallivm/lp_bld_sample.h     |   13 +-
>  src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c |   20 +--
>  src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |  141
>  ++++++++++++---------
>  4 files changed, 169 insertions(+), 131 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> index 89d7249..e1cfd78 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> @@ -217,7 +217,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
>     struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
>     struct lp_build_context *float_bld = &bld->float_bld;
>     struct lp_build_context *coord_bld = &bld->coord_bld;
> -   struct lp_build_context *levelf_bld = &bld->levelf_bld;
> +   struct lp_build_context *rho_bld = &bld->lodf_bld;
>     const unsigned dims = bld->dims;
>     LLVMValueRef ddx_ddy[2];
>     LLVMBuilderRef builder = bld->gallivm->builder;
> @@ -231,7 +231,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
>     LLVMValueRef first_level, first_level_vec;
>     unsigned length = coord_bld->type.length;
>     unsigned num_quads = length / 4;
> -   boolean rho_per_quad = levelf_bld->type.length != length;
> +   boolean rho_per_quad = rho_bld->type.length != length;
>     unsigned i;
>     LLVMValueRef i32undef =
>     LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
>     LLVMValueRef rho_xvec, rho_yvec;
> @@ -259,18 +259,18 @@ lp_build_rho(struct lp_build_sample_context *bld,
>         */
>        if (rho_per_quad) {
>           rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> -                                         levelf_bld->type, cube_rho, 0);
> +                                         rho_bld->type, cube_rho, 0);
>        }
>        else {
>           rho = lp_build_swizzle_scalar_aos(coord_bld, cube_rho, 0, 4);
>        }
>        if (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) {
> -         rho = lp_build_sqrt(levelf_bld, rho);
> +         rho = lp_build_sqrt(rho_bld, rho);
>        }
>        /* Could optimize this for single quad just skip the broadcast */
>        cubesize = lp_build_extract_broadcast(gallivm,
>        bld->float_size_in_type,
> -                                            levelf_bld->type, float_size,
> index0);
> -      rho = lp_build_mul(levelf_bld, cubesize, rho);
> +                                            rho_bld->type, float_size,
> index0);
> +      rho = lp_build_mul(rho_bld, cubesize, rho);
>     }
>     else if (derivs && !(bld->static_texture_state->target ==
>     PIPE_TEXTURE_CUBE)) {
>        LLVMValueRef ddmax[3], ddx[3], ddy[3];
> @@ -311,9 +311,9 @@ lp_build_rho(struct lp_build_sample_context *bld,
>               * otherwise would also need different code to per-pixel lod
>               case.
>               */
>              rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> -                                            levelf_bld->type, rho, 0);
> +                                            rho_bld->type, rho, 0);
>           }
> -         rho = lp_build_sqrt(levelf_bld, rho);
> +         rho = lp_build_sqrt(rho_bld, rho);
>  
>        }
>        else {
> @@ -329,7 +329,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
>               * rho_vec contains per-pixel rho, convert to scalar per quad.
>               */
>              rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> -                                            levelf_bld->type, rho, 0);
> +                                            rho_bld->type, rho, 0);
>           }
>        }
>     }
> @@ -404,7 +404,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
>  
>           if (rho_per_quad) {
>              rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> -                                            levelf_bld->type, rho, 0);
> +                                            rho_bld->type, rho, 0);
>           }
>           else {
>              /*
> @@ -416,7 +416,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
>               */
>              rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
>           }
> -         rho = lp_build_sqrt(levelf_bld, rho);
> +         rho = lp_build_sqrt(rho_bld, rho);
>        }
>        else {
>           ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
> @@ -497,7 +497,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
>              }
>              if (rho_per_quad) {
>                 rho = lp_build_pack_aos_scalars(bld->gallivm,
>                 coord_bld->type,
> -                                               levelf_bld->type, rho, 0);
> +                                               rho_bld->type, rho, 0);
>              }
>              else {
>                 rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
> @@ -528,7 +528,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
>                 }
>              }
>              if (!rho_per_quad) {
> -               rho = lp_build_broadcast_scalar(levelf_bld, rho);
> +               rho = lp_build_broadcast_scalar(rho_bld, rho);
>              }
>           }
>        }
> @@ -675,8 +675,7 @@ lp_build_brilinear_rho(struct lp_build_context *bld,
>   * \param out_lod_fpart  float part of lod (never larger than 1 but may be
>   negative)
>   * \param out_lod_positive  (mask) if lod is positive (i.e. texture is
>   minified)
>   *
> - * The resulting lod is scalar per quad, so only the first value per quad
> - * passed in from lod_bias, explicit_lod is used.
> + * The resulting lod can be scalar per quad or be per element.
>   */
>  void
>  lp_build_lod_selector(struct lp_build_sample_context *bld,
> @@ -696,12 +695,12 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
>  
>  {
>     LLVMBuilderRef builder = bld->gallivm->builder;
> -   struct lp_build_context *levelf_bld = &bld->levelf_bld;
> +   struct lp_build_context *lodf_bld = &bld->lodf_bld;
>     LLVMValueRef lod;
>  
> -   *out_lod_ipart = bld->leveli_bld.zero;
> -   *out_lod_positive = bld->leveli_bld.zero;
> -   *out_lod_fpart = levelf_bld->zero;
> +   *out_lod_ipart = bld->lodi_bld.zero;
> +   *out_lod_positive = bld->lodi_bld.zero;
> +   *out_lod_fpart = lodf_bld->zero;
>  
>     /*
>      * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture
>      Magnification:
> @@ -729,13 +728,13 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
>           bld->dynamic_state->min_lod(bld->dynamic_state,
>                                       bld->gallivm, sampler_unit);
>  
> -      lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
> +      lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
>     }
>     else {
>        if (explicit_lod) {
>           if (bld->num_lods != bld->coord_type.length)
>              lod = lp_build_pack_aos_scalars(bld->gallivm,
>              bld->coord_bld.type,
> -                                            levelf_bld->type, explicit_lod,
> 0);
> +                                            lodf_bld->type, explicit_lod,
> 0);
>           else
>              lod = explicit_lod;
>        }
> @@ -764,33 +763,33 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
>                  * Don't actually need both all the time, ipart is needed
>                  * for nearest mipfilter, pos_or_zero if min != mag.
>                  */
> -               *out_lod_ipart = lp_build_ilog2(levelf_bld, rho);
> -               *out_lod_positive = lp_build_cmp(levelf_bld,
> PIPE_FUNC_GREATER,
> -                                                rho, levelf_bld->one);
> +               *out_lod_ipart = lp_build_ilog2(lodf_bld, rho);
> +               *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
> +                                                rho, lodf_bld->one);
>                 return;
>              }
>              if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
>                  !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
> -               lp_build_brilinear_rho(levelf_bld, rho, BRILINEAR_FACTOR,
> +               lp_build_brilinear_rho(lodf_bld, rho, BRILINEAR_FACTOR,
>                                        out_lod_ipart, out_lod_fpart);
> -               *out_lod_positive = lp_build_cmp(levelf_bld,
> PIPE_FUNC_GREATER,
> -                                                rho, levelf_bld->one);
> +               *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
> +                                                rho, lodf_bld->one);
>                 return;
>              }
>           }
>  
>           if (0) {
> -            lod = lp_build_log2(levelf_bld, rho);
> +            lod = lp_build_log2(lodf_bld, rho);
>           }
>           else {
> -            lod = lp_build_fast_log2(levelf_bld, rho);
> +            lod = lp_build_fast_log2(lodf_bld, rho);
>           }
>  
>           /* add shader lod bias */
>           if (lod_bias) {
>              if (bld->num_lods != bld->coord_type.length)
>                 lod_bias = lp_build_pack_aos_scalars(bld->gallivm,
>                 bld->coord_bld.type,
> -                                                    levelf_bld->type,
> lod_bias, 0);
> +                                                    lodf_bld->type,
> lod_bias, 0);
>              lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
>           }
>        }
> @@ -800,7 +799,7 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
>           LLVMValueRef sampler_lod_bias =
>              bld->dynamic_state->lod_bias(bld->dynamic_state,
>                                           bld->gallivm, sampler_unit);
> -         sampler_lod_bias = lp_build_broadcast_scalar(levelf_bld,
> +         sampler_lod_bias = lp_build_broadcast_scalar(lodf_bld,
>                                                        sampler_lod_bias);
>           lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias,
>           "sampler_lod_bias");
>        }
> @@ -810,36 +809,36 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
>           LLVMValueRef max_lod =
>              bld->dynamic_state->max_lod(bld->dynamic_state,
>                                          bld->gallivm, sampler_unit);
> -         max_lod = lp_build_broadcast_scalar(levelf_bld, max_lod);
> +         max_lod = lp_build_broadcast_scalar(lodf_bld, max_lod);
>  
> -         lod = lp_build_min(levelf_bld, lod, max_lod);
> +         lod = lp_build_min(lodf_bld, lod, max_lod);
>        }
>        if (bld->static_sampler_state->apply_min_lod) {
>           LLVMValueRef min_lod =
>              bld->dynamic_state->min_lod(bld->dynamic_state,
>                                          bld->gallivm, sampler_unit);
> -         min_lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
> +         min_lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
>  
> -         lod = lp_build_max(levelf_bld, lod, min_lod);
> +         lod = lp_build_max(lodf_bld, lod, min_lod);
>        }
>     }
>  
> -   *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
> -                                    lod, levelf_bld->zero);
> +   *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
> +                                    lod, lodf_bld->zero);
>  
>     if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
>        if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
> -         lp_build_brilinear_lod(levelf_bld, lod, BRILINEAR_FACTOR,
> +         lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR,
>                                  out_lod_ipart, out_lod_fpart);
>        }
>        else {
> -         lp_build_ifloor_fract(levelf_bld, lod, out_lod_ipart,
> out_lod_fpart);
> +         lp_build_ifloor_fract(lodf_bld, lod, out_lod_ipart, out_lod_fpart);
>        }
>  
>        lp_build_name(*out_lod_fpart, "lod_fpart");
>     }
>     else {
> -      *out_lod_ipart = lp_build_iround(levelf_bld, lod);
> +      *out_lod_ipart = lp_build_iround(lodf_bld, lod);
>     }
>  
>     lp_build_name(*out_lod_ipart, "lod_ipart");
> @@ -880,14 +879,14 @@ lp_build_nearest_mip_level(struct
> lp_build_sample_context *bld,
>        out = lp_build_cmp(leveli_bld, PIPE_FUNC_LESS, level, first_level);
>        out1 = lp_build_cmp(leveli_bld, PIPE_FUNC_GREATER, level, last_level);
>        out = lp_build_or(leveli_bld, out, out1);
> -      if (bld->num_lods == bld->coord_bld.type.length) {
> +      if (bld->num_mips == bld->coord_bld.type.length) {
>           *out_of_bounds = out;
>        }
> -      else if (bld->num_lods == 1) {
> +      else if (bld->num_mips == 1) {
>           *out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld,
>           out);
>        }
>        else {
> -         assert(bld->num_lods == bld->coord_bld.type.length / 4);
> +         assert(bld->num_mips == bld->coord_bld.type.length / 4);
>           *out_of_bounds =
>           lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
>                                                                  leveli_bld->type,
>                                                                  bld->int_coord_bld.type,
> @@ -904,8 +903,9 @@ lp_build_nearest_mip_level(struct lp_build_sample_context
> *bld,
>  
>  
>  /**
> - * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad int LOD(s) to two
> (per-quad)
> - * (adjacent) mipmap level indexes, and fix up float lod part accordingly.
> + * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad (or per element) int
> LOD(s)
> + * to two (per-quad) (adjacent) mipmap level indexes, and fix up float lod
> + * part accordingly.
>   * Later, we'll sample from those two mipmap levels and interpolate between
>   them.
>   */
>  void
> @@ -923,6 +923,8 @@ lp_build_linear_mip_levels(struct lp_build_sample_context
> *bld,
>     LLVMValueRef clamp_min;
>     LLVMValueRef clamp_max;
>  
> +   assert(bld->num_lods == bld->num_mips);
> +
>     first_level = bld->dynamic_state->first_level(bld->dynamic_state,
>                                                   bld->gallivm,
>                                                   texture_unit);
>     last_level = bld->dynamic_state->last_level(bld->dynamic_state,
> @@ -1013,17 +1015,17 @@ lp_build_get_mip_offsets(struct
> lp_build_sample_context *bld,
>     LLVMValueRef indexes[2], offsets, offset1;
>  
>     indexes[0] = lp_build_const_int32(bld->gallivm, 0);
> -   if (bld->num_lods == 1) {
> +   if (bld->num_mips == 1) {
>        indexes[1] = level;
>        offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
>        offset1 = LLVMBuildLoad(builder, offset1, "");
>        offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
>     }
> -   else if (bld->num_lods == bld->coord_bld.type.length / 4) {
> +   else if (bld->num_mips == bld->coord_bld.type.length / 4) {
>        unsigned i;
>  
>        offsets = bld->int_coord_bld.undef;
> -      for (i = 0; i < bld->num_lods; i++) {
> +      for (i = 0; i < bld->num_mips; i++) {
>           LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
>           LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
>           indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
> @@ -1036,10 +1038,10 @@ lp_build_get_mip_offsets(struct
> lp_build_sample_context *bld,
>     else {
>        unsigned i;
>  
> -      assert (bld->num_lods == bld->coord_bld.type.length);
> +      assert (bld->num_mips == bld->coord_bld.type.length);
>  
>        offsets = bld->int_coord_bld.undef;
> -      for (i = 0; i < bld->num_lods; i++) {
> +      for (i = 0; i < bld->num_mips; i++) {
>           LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
>           indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
>           offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
> @@ -1089,18 +1091,18 @@ lp_build_get_level_stride_vec(struct
> lp_build_sample_context *bld,
>     LLVMBuilderRef builder = bld->gallivm->builder;
>     LLVMValueRef indexes[2], stride, stride1;
>     indexes[0] = lp_build_const_int32(bld->gallivm, 0);
> -   if (bld->num_lods == 1) {
> +   if (bld->num_mips == 1) {
>        indexes[1] = level;
>        stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
>        stride1 = LLVMBuildLoad(builder, stride1, "");
>        stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
>     }
> -   else if (bld->num_lods == bld->coord_bld.type.length / 4) {
> +   else if (bld->num_mips == bld->coord_bld.type.length / 4) {
>        LLVMValueRef stride1;
>        unsigned i;
>  
>        stride = bld->int_coord_bld.undef;
> -      for (i = 0; i < bld->num_lods; i++) {
> +      for (i = 0; i < bld->num_mips; i++) {
>           LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
>           LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
>           indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
> @@ -1114,7 +1116,7 @@ lp_build_get_level_stride_vec(struct
> lp_build_sample_context *bld,
>        LLVMValueRef stride1;
>        unsigned i;
>  
> -      assert (bld->num_lods == bld->coord_bld.type.length);
> +      assert (bld->num_mips == bld->coord_bld.type.length);
>  
>        stride = bld->int_coord_bld.undef;
>        for (i = 0; i < bld->coord_bld.type.length; i++) {
> @@ -1147,7 +1149,7 @@ lp_build_mipmap_level_sizes(struct
> lp_build_sample_context *bld,
>     /*
>      * Compute width, height, depth at mipmap level 'ilevel'
>      */
> -   if (bld->num_lods == 1) {
> +   if (bld->num_mips == 1) {
>        ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
>        *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size,
>        ilevel_vec);
>     }
> @@ -1157,7 +1159,7 @@ lp_build_mipmap_level_sizes(struct
> lp_build_sample_context *bld,
>        unsigned num_quads = bld->coord_bld.type.length / 4;
>        unsigned i;
>  
> -      if (bld->num_lods == num_quads) {
> +      if (bld->num_mips == num_quads) {
>           /*
>            * XXX: this should be #ifndef SANE_INSTRUCTION_SET.
>            * intel "forgot" the variable shift count instruction until avx2.
> @@ -1216,7 +1218,7 @@ lp_build_mipmap_level_sizes(struct
> lp_build_sample_context *bld,
>           * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
>           * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...]
>           vector.
>           */
> -         assert(bld->num_lods == bld->coord_bld.type.length);
> +         assert(bld->num_mips == bld->coord_bld.type.length);
>           if (bld->dims == 1) {
>              assert(bld->int_size_in_bld.type.length == 1);
>              int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
> @@ -1226,7 +1228,7 @@ lp_build_mipmap_level_sizes(struct
> lp_build_sample_context *bld,
>           }
>           else {
>              LLVMValueRef ilevel1;
> -            for (i = 0; i < bld->num_lods; i++) {
> +            for (i = 0; i < bld->num_mips; i++) {
>                 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
>                 ilevel1 = lp_build_extract_broadcast(bld->gallivm,
>                 bld->int_coord_type,
>                                                      bld->int_size_in_bld.type,
>                                                      ilevel, indexi);
> @@ -1235,7 +1237,7 @@ lp_build_mipmap_level_sizes(struct
> lp_build_sample_context *bld,
>              }
>              *out_size = lp_build_concat(bld->gallivm, tmp,
>                                          bld->int_size_in_bld.type,
> -                                        bld->num_lods);
> +                                        bld->num_mips);
>           }
>        }
>     }
> @@ -1278,7 +1280,7 @@ lp_build_extract_image_sizes(struct
> lp_build_sample_context *bld,
>     LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
>     struct lp_type size_type = size_bld->type;
>  
> -   if (bld->num_lods == 1) {
> +   if (bld->num_mips == 1) {
>        *out_width = lp_build_extract_broadcast(bld->gallivm,
>                                                size_type,
>                                                coord_type,
> @@ -1305,7 +1307,7 @@ lp_build_extract_image_sizes(struct
> lp_build_sample_context *bld,
>        if (dims == 1) {
>           *out_width = size;
>        }
> -      else if (bld->num_lods == num_quads) {
> +      else if (bld->num_mips == num_quads) {
>           *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4);
>           if (dims >= 2) {
>              *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4);
> @@ -1315,7 +1317,7 @@ lp_build_extract_image_sizes(struct
> lp_build_sample_context *bld,
>           }
>        }
>        else {
> -         assert(bld->num_lods == bld->coord_type.length);
> +         assert(bld->num_mips == bld->coord_type.length);
>           *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
>                                                  coord_type, size, 0);
>           if (dims >= 2) {
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> index a7ebe7e..e6b9f30 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> @@ -233,7 +233,10 @@ struct lp_build_sample_context
>     /** SIMD vector width */
>     unsigned vector_width;
>  
> -   /** number of lod values (valid are 1, length/4, length) */
> +   /** number of mipmaps (valid are 1, length/4, length) */
> +   unsigned num_mips;
> +
> +   /** number of lod values (valid are 1, length/4, length) */
>     unsigned num_lods;
>  
>     /** regular scalar float type */
> @@ -283,6 +286,14 @@ struct lp_build_sample_context
>     struct lp_type leveli_type;
>     struct lp_build_context leveli_bld;
>  
> +   /** Float lod type */
> +   struct lp_type lodf_type;
> +   struct lp_build_context lodf_bld;
> +
> +   /** Int lod type */
> +   struct lp_type lodi_type;
> +   struct lp_build_context lodi_bld;
> +
>     /* Common dynamic state values */
>     LLVMValueRef row_stride_array;
>     LLVMValueRef img_stride_array;
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
> index 7431388..c35b628 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
> @@ -1373,7 +1373,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>     lp_build_mipmap_level_sizes(bld, ilevel0,
>                                 &size0,
>                                 &row_stride0_vec, &img_stride0_vec);
> -   if (bld->num_lods == 1) {
> +   if (bld->num_mips == 1) {
>        data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
>     }
>     else {
> @@ -1422,8 +1422,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>  
>     if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
>        LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
> -                                                     bld->levelf_bld.type,
> 256.0);
> -      LLVMTypeRef i32vec_type = bld->leveli_bld.vec_type;
> +                                                     bld->lodf_bld.type,
> 256.0);
> +      LLVMTypeRef i32vec_type = bld->lodi_bld.vec_type;
>        struct lp_build_if_state if_ctx;
>        LLVMValueRef need_lerp;
>        unsigned num_quads = bld->coord_bld.type.length / 4;
> @@ -1435,7 +1435,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>        /* need_lerp = lod_fpart > 0 */
>        if (bld->num_lods == 1) {
>           need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
> -                                   lod_fpart, bld->leveli_bld.zero,
> +                                   lod_fpart, bld->lodi_bld.zero,
>                                     "need_lerp");
>        }
>        else {
> @@ -1450,9 +1450,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>            * lod_fpart values have same sign.
>            * We can however then skip the greater than comparison.
>            */
> -         lod_fpart = lp_build_max(&bld->leveli_bld, lod_fpart,
> -                                  bld->leveli_bld.zero);
> -         need_lerp = lp_build_any_true_range(&bld->leveli_bld,
> bld->num_lods, lod_fpart);
> +         lod_fpart = lp_build_max(&bld->lodi_bld, lod_fpart,
> +                                  bld->lodi_bld.zero);
> +         need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
> lod_fpart);
>        }
>  
>        lp_build_if(&if_ctx, bld->gallivm, need_lerp);
> @@ -1465,7 +1465,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>           lp_build_mipmap_level_sizes(bld, ilevel1,
>                                       &size1,
>                                       &row_stride1_vec, &img_stride1_vec);
> -         if (bld->num_lods == 1) {
> +         if (bld->num_mips == 1) {
>              data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
>           }
>           else {
> @@ -1524,7 +1524,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>           }
>           else {
>              unsigned num_chans_per_lod = 4 * bld->coord_type.length /
>              bld->num_lods;
> -            LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type,
> bld->leveli_bld.type.length);
> +            LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type,
> bld->lodi_bld.type.length);
>              LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
>  
>              /* Take the LSB of lod_fpart */
> @@ -1613,7 +1613,7 @@ lp_build_sample_aos(struct lp_build_sample_context
> *bld,
>         * some max probably could hack up the weights in the linear
>         * path with selects to work for nearest.
>         */
> -      if (bld->leveli_bld.type.length > 1)
> +      if (bld->num_lods > 1)
>           lod_positive = LLVMBuildExtractElement(builder, lod_positive,
>                                                  lp_build_const_int32(bld->gallivm,
>                                                  0), "");
>  
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> index 8ad3b9f..c686d82 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> @@ -1087,7 +1087,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>     lp_build_mipmap_level_sizes(bld, ilevel0,
>                                 &size0,
>                                 &row_stride0_vec, &img_stride0_vec);
> -   if (bld->num_lods == 1) {
> +   if (bld->num_mips == 1) {
>        data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
>     }
>     else {
> @@ -1123,7 +1123,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>        /* need_lerp = lod_fpart > 0 */
>        if (bld->num_lods == 1) {
>           need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
> -                                   lod_fpart, bld->levelf_bld.zero,
> +                                   lod_fpart, bld->lodf_bld.zero,
>                                     "need_lerp");
>        }
>        else {
> @@ -1138,12 +1138,12 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>            * negative values which would screw up filtering if not all
>            * lod_fpart values have same sign.
>            */
> -         lod_fpart = lp_build_max(&bld->levelf_bld, lod_fpart,
> -                                  bld->levelf_bld.zero);
> -         need_lerp = lp_build_compare(bld->gallivm, bld->levelf_bld.type,
> +         lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
> +                                  bld->lodf_bld.zero);
> +         need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
>                                        PIPE_FUNC_GREATER,
> -                                      lod_fpart, bld->levelf_bld.zero);
> -         need_lerp = lp_build_any_true_range(&bld->leveli_bld,
> bld->num_lods, need_lerp);
> +                                      lod_fpart, bld->lodf_bld.zero);
> +         need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
> need_lerp);
>        }
>  
>        lp_build_if(&if_ctx, bld->gallivm, need_lerp);
> @@ -1152,7 +1152,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>           lp_build_mipmap_level_sizes(bld, ilevel1,
>                                       &size1,
>                                       &row_stride1_vec, &img_stride1_vec);
> -         if (bld->num_lods == 1) {
> +         if (bld->num_mips == 1) {
>              data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
>           }
>           else {
> @@ -1178,7 +1178,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>  
>           if (bld->num_lods != bld->coord_type.length)
>              lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
> -
> bld->levelf_bld.type,
> +
> bld->lodf_bld.type,
>                                                                bld->texel_bld.type,
>                                                                lod_fpart);
>  
> @@ -1312,8 +1312,14 @@ lp_build_sample_common(struct lp_build_sample_context
> *bld,
>                              mip_filter,
>                              &lod_ipart, lod_fpart, lod_pos_or_zero);
>     } else {
> -      lod_ipart = bld->leveli_bld.zero;
> -      *lod_pos_or_zero = bld->leveli_bld.zero;
> +      lod_ipart = bld->lodi_bld.zero;
> +      *lod_pos_or_zero = bld->lodi_bld.zero;
> +   }
> +
> +   if (bld->num_lods != bld->num_mips) {
> +      /* only makes sense if there's just a single mip level */
> +      assert(bld->num_mips == 1);
> +      lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
>     }
>  
>     /*
> @@ -1641,7 +1647,7 @@ lp_build_sample_general(struct lp_build_sample_context
> *bld,
>         * some max probably could hack up the weights in the linear
>         * path with selects to work for nearest.
>         */
> -      if (bld->leveli_bld.type.length > 1)
> +      if (bld->num_lods > 1)
>           lod_positive = LLVMBuildExtractElement(builder, lod_positive,
>                                                  lp_build_const_int32(bld->gallivm,
>                                                  0), "");
>  
> @@ -1692,7 +1698,7 @@ lp_build_fetch_texel(struct lp_build_sample_context
> *bld,
>                       const LLVMValueRef *offsets,
>                       LLVMValueRef *colors_out)
>  {
> -   struct lp_build_context *perquadi_bld = &bld->leveli_bld;
> +   struct lp_build_context *perquadi_bld = &bld->lodi_bld;
>     struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
>     unsigned dims = bld->dims, chan;
>     unsigned target = bld->static_texture_state->target;
> @@ -1706,7 +1712,7 @@ lp_build_fetch_texel(struct lp_build_sample_context
> *bld,
>     out_of_bounds = int_coord_bld->zero;
>  
>     if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
> -      if (bld->num_lods != int_coord_bld->type.length) {
> +      if (bld->num_mips != int_coord_bld->type.length) {
>           ilevel = lp_build_pack_aos_scalars(bld->gallivm,
>           int_coord_bld->type,
>                                              perquadi_bld->type,
>                                              explicit_lod, 0);
>        }
> @@ -1717,7 +1723,7 @@ lp_build_fetch_texel(struct lp_build_sample_context
> *bld,
>                                   out_of_bound_ret_zero ? &out_of_bounds :
>                                   NULL);
>     }
>     else {
> -      assert(bld->num_lods == 1);
> +      assert(bld->num_mips == 1);
>        if (bld->static_texture_state->target != PIPE_BUFFER) {
>           ilevel = bld->dynamic_state->first_level(bld->dynamic_state,
>                                                    bld->gallivm,
>                                                    texture_unit);
> @@ -1856,7 +1862,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>     unsigned target = static_texture_state->target;
>     unsigned dims = texture_dims(target);
>     unsigned num_quads = type.length / 4;
> -   unsigned mip_filter, i;
> +   unsigned mip_filter, min_img_filter, mag_img_filter, i;
>     struct lp_build_sample_context bld;
>     struct lp_static_sampler_state derived_sampler_state =
>     *static_sampler_state;
>     LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
> @@ -1919,6 +1925,10 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>        debug_printf("  .min_mip_filter = %u\n",
>        derived_sampler_state.min_mip_filter);
>     }
>  
> +   min_img_filter = static_sampler_state->min_img_filter;
> +   mag_img_filter = static_sampler_state->mag_img_filter;
> +
> +
>     /*
>      * This is all a bit complicated different paths are chosen for
>      performance
>      * reasons.
> @@ -1936,38 +1946,51 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>     /*
>      * There are other situations where at least the multiple int lods could
>      be
>      * avoided like min and max lod being equal.
> -    * XXX if num_lods == 1 (for multiple quads) the level bld contexts will
> still
> -    * have length 4. Because lod_selector is always using per quad calcs in
> this
> -    * case, but minification etc. don't need to bother. This is very brittle
> though
> -    * e.g. num_lods might be 1 but still have multiple positive_lod values!
>      */
> +   bld.num_mips = bld.num_lods = 1;
>     if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
>         (explicit_lod || lod_bias ||
> -        (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE)) &&
> -       ((is_fetch && target != PIPE_BUFFER) ||
> -        (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
> -      bld.num_lods = type.length;
> +        (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) {
> +      if ((is_fetch && target != PIPE_BUFFER) ||
> +          (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
> +         bld.num_mips = type.length;
> +         bld.num_lods = type.length;
> +      }
> +      else if (!is_fetch && min_img_filter != mag_img_filter) {
> +         bld.num_mips = 1;
> +         bld.num_lods = type.length;
> +      }
> +   }
>     /* TODO: for true scalar_lod should only use 1 lod value */
> -   else if ((is_fetch && explicit_lod && target != PIPE_BUFFER ) ||
> +   else if ((is_fetch && explicit_lod && target != PIPE_BUFFER) ||
>              (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
> +      bld.num_mips = num_quads;
>        bld.num_lods = num_quads;
>     }
> -   else {
> -      bld.num_lods = 1;
> +   else if (!is_fetch && min_img_filter != mag_img_filter) {
> +      bld.num_mips = 1;
> +      bld.num_lods = num_quads;
>     }
>  
> -   bld.levelf_type = type;
> +
> +   bld.lodf_type = type;
>     /* we want native vector size to be able to use our intrinsics */
>     if (bld.num_lods != type.length) {
> -      bld.levelf_type.length = type.length > 4 ? ((type.length + 15) / 16) *
> 4 : 1;
> +      /* TODO: this currently always has to be per-quad or per-element */
> +      bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4
> : 1;
> +   }
> +   bld.lodi_type = lp_int_type(bld.lodf_type);
> +   bld.levelf_type = bld.lodf_type;
> +   if (bld.num_mips == 1) {
> +      bld.levelf_type.length = 1;
>     }
>     bld.leveli_type = lp_int_type(bld.levelf_type);
>     bld.float_size_type = bld.float_size_in_type;
>     /* Note: size vectors may not be native. They contain minified w/h/d/_
>     values,
>      * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32
>      */
> -   if (bld.num_lods > 1) {
> -      bld.float_size_type.length = bld.num_lods == type.length ?
> -                                      bld.num_lods *
> bld.float_size_in_type.length :
> +   if (bld.num_mips > 1) {
> +      bld.float_size_type.length = bld.num_mips == type.length ?
> +                                      bld.num_mips *
> bld.float_size_in_type.length :
>                                        type.length;
>     }
>     bld.int_size_type = lp_int_type(bld.float_size_type);
> @@ -1984,6 +2007,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>     lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
>     lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
>     lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
> +   lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
> +   lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
>  
>     /* Get the dynamic state */
>     tex_width = dynamic_state->width(dynamic_state, gallivm, texture_index);
> @@ -2071,16 +2096,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>         * (It should be faster if we'd support avx2)
>         */
>        if (num_quads == 1 || !use_aos) {
> -
> -         if (num_quads > 1) {
> -            if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
> -               LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
> -               /*
> -                * This parameter is the same for all quads could probably
> simplify.
> -                */
> -               ilevel0 = LLVMBuildExtractElement(builder, ilevel0, index0,
> "");
> -            }
> -         }
>           if (use_aos) {
>              /* do sampling/filtering with fixed pt arithmetic */
>              lp_build_sample_aos(&bld, sampler_index,
> @@ -2134,30 +2149,37 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>           bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
>           bld4.texel_type = bld.texel_type;
>           bld4.texel_type.length = 4;
> -         bld4.levelf_type = type4;
> -         /* we want native vector size to be able to use our intrinsics */
> -         bld4.levelf_type.length = 1;
> -         bld4.leveli_type = lp_int_type(bld4.levelf_type);
>  
> +         bld4.num_mips = bld4.num_lods = 1;
>           if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
> -               (explicit_lod || lod_bias ||
> -                (derivs && static_texture_state->target !=
> PIPE_TEXTURE_CUBE)) &&
> -               ((is_fetch && target != PIPE_BUFFER) ||
> -                (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
> -            bld4.num_lods = type4.length;
> -         else
> -            bld4.num_lods = 1;
> +             (explicit_lod || lod_bias ||
> +              (derivs && static_texture_state->target !=
> PIPE_TEXTURE_CUBE))) {
> +            if ((is_fetch && target != PIPE_BUFFER) ||
> +                (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
> +               bld4.num_mips = type4.length;
> +               bld4.num_lods = type4.length;
> +            }
> +            else if (!is_fetch && min_img_filter != mag_img_filter) {
> +               bld4.num_mips = 1;
> +               bld4.num_lods = type4.length;
> +            }
> +         }
>  
> -         bld4.levelf_type = type4;
>           /* we want native vector size to be able to use our intrinsics */
> +         bld4.lodf_type = type4;
>           if (bld4.num_lods != type4.length) {
> +            bld4.lodf_type.length = 1;
> +         }
> +         bld4.lodi_type = lp_int_type(bld4.lodf_type);
> +         bld4.levelf_type = type4;
> +         if (bld4.num_mips != type4.length) {
>              bld4.levelf_type.length = 1;
>           }
>           bld4.leveli_type = lp_int_type(bld4.levelf_type);
>           bld4.float_size_type = bld4.float_size_in_type;
> -         if (bld4.num_lods > 1) {
> -            bld4.float_size_type.length = bld4.num_lods == type4.length ?
> -                                            bld4.num_lods *
> bld4.float_size_in_type.length :
> +         if (bld4.num_mips > 1) {
> +            bld4.float_size_type.length = bld4.num_mips == type4.length ?
> +                                            bld4.num_mips *
> bld4.float_size_in_type.length :
>                                              type4.length;
>           }
>           bld4.int_size_type = lp_int_type(bld4.float_size_type);
> @@ -2174,6 +2196,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>           lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
>           lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
>           lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
> +         lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
> +         lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
>  
>           for (i = 0; i < num_quads; i++) {
>              LLVMValueRef s4, t4, r4;
> @@ -2196,7 +2220,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>                 }
>              }
>              lod_positive4 = lp_build_extract_range(gallivm, lod_positive,
>              num_lods * i, num_lods);
> -            ilevel04 = lp_build_extract_range(gallivm, ilevel0, num_lods *
> i, num_lods);
> +            ilevel04 = bld.num_mips == 1 ? ilevel0 :
> +                          lp_build_extract_range(gallivm, ilevel0, num_lods
> * i, num_lods);
>              if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
>                 ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods
>                 * i, num_lods);
>                 lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart,
>                 num_lods * i, num_lods);
> --
> 1.7.9.5
>