[Mesa-dev] [PATCH] gallivm: do per-pixel lod calculations for explicit lod

Wed Jul 3 08:28:10 PDT 2013

I don't fully grasp every detail (many paths), but looks good in principle.

Where do the 16xf32 vectors come from?

Also, please add a comment somewhere summarizing all the code paths for lod handling:

 - AVX vs non AVX
 - SOA vs AOS
 - scalar lod vs stamp lod

But I couldn't spot anything wrong.

Jose

----- Original Message -----
> From: Roland Scheidegger <sroland at vmware.com>
> 
> d3d10 requires per-pixel lod calculations for explicit lod, lod bias and
> explicit derivatives, and we should probably do it for OpenGL too - at least
> if they are used from vertex or geometry shaders (so doesn't apply to lod
> bias) this doesn't just affect neighboring pixels.
> Some code was already there to handle this so fix it up and enable it.
> There will no doubt be a performance hit unfortunately, we could do better
> if we'd knew we had a real vector shift instruction (with variable shift
> count) but this requires AVX2 on x86 (or a AMD Bulldozer family cpu).
> Don't do anything for lod bias and explicit derivatives yet, though
> no special magic should be needed for them neither.
> Likewise, the size query is still broken just the same.
> 
> v2: Use information if lod is a (broadcast) scalar or not. The idea would be
> to base this on the actual value, for now just pretend it's a scalar in fs
> and not a scalar otherwise (so, per-pixel lod is only used in gs/vs but same
> code is generated for fs as before).
> ---
>  src/gallium/auxiliary/draw/draw_llvm_sample.c     |    3 +-
>  src/gallium/auxiliary/gallivm/lp_bld_sample.c     |  110 ++++++++---------
>  src/gallium/auxiliary/gallivm/lp_bld_sample.h     |   13 ++-
>  src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c |   26 ++---
>  src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |  130
>  +++++++++++++--------
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi.h       |    1 +
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c   |   18 ++-
>  src/gallium/auxiliary/tgsi/tgsi_scan.c            |    1 +
>  src/gallium/auxiliary/tgsi/tgsi_scan.h            |    2 +
>  src/gallium/drivers/llvmpipe/lp_tex_sample.c      |    3 +-
>  10 files changed, 181 insertions(+), 126 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/draw/draw_llvm_sample.c
> b/src/gallium/auxiliary/draw/draw_llvm_sample.c
> index e51e011..0cb5c21 100644
> --- a/src/gallium/auxiliary/draw/draw_llvm_sample.c
> +++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c
> @@ -238,6 +238,7 @@ draw_llvm_sampler_soa_emit_fetch_texel(const struct
> lp_build_sampler_soa *base,
>                                         const struct lp_derivatives *derivs,
>                                         LLVMValueRef lod_bias, /* optional */
>                                         LLVMValueRef explicit_lod, /*
>                                         optional */
> +                                       boolean scalar_lod,
>                                         LLVMValueRef *texel)
>  {
>     struct draw_llvm_sampler_soa *sampler = (struct draw_llvm_sampler_soa
>     *)base;
> @@ -256,7 +257,7 @@ draw_llvm_sampler_soa_emit_fetch_texel(const struct
> lp_build_sampler_soa *base,
>                         coords,
>                         offsets,
>                         derivs,
> -                       lod_bias, explicit_lod,
> +                       lod_bias, explicit_lod, scalar_lod,
>                         texel);
>  }
>  
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> index d689c7b..c2efec9 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> @@ -215,7 +215,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
>     struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
>     struct lp_build_context *float_bld = &bld->float_bld;
>     struct lp_build_context *coord_bld = &bld->coord_bld;
> -   struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
> +   struct lp_build_context *levelf_bld = &bld->levelf_bld;
>     const unsigned dims = bld->dims;
>     LLVMValueRef ddx_ddy[2];
>     LLVMBuilderRef builder = bld->gallivm->builder;
> @@ -235,6 +235,8 @@ lp_build_rho(struct lp_build_sample_context *bld,
>  
>     /* Note that all simplified calculations will only work for isotropic
>     filtering */
>  
> +   assert(bld->num_lods != length);
> +
>     first_level = bld->dynamic_state->first_level(bld->dynamic_state,
>                                                   bld->gallivm,
>                                                   texture_unit);
>     first_level_vec = lp_build_broadcast_scalar(int_size_bld, first_level);
> @@ -248,14 +250,14 @@ lp_build_rho(struct lp_build_sample_context *bld,
>         * Cube map code did already everything except size mul and per-quad
>         extraction.
>         */
>        rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> -                                      perquadf_bld->type, cube_rho, 0);
> +                                      levelf_bld->type, cube_rho, 0);
>        if (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) {
> -         rho = lp_build_sqrt(perquadf_bld, rho);
> +         rho = lp_build_sqrt(levelf_bld, rho);
>        }
>        /* Could optimize this for single quad just skip the broadcast */
>        cubesize = lp_build_extract_broadcast(gallivm,
>        bld->float_size_in_type,
> -                                            perquadf_bld->type, float_size,
> index0);
> -      rho = lp_build_mul(perquadf_bld, cubesize, rho);
> +                                            levelf_bld->type, float_size,
> index0);
> +      rho = lp_build_mul(levelf_bld, cubesize, rho);
>     }
>     else if (derivs && !(bld->static_texture_state->target ==
>     PIPE_TEXTURE_CUBE)) {
>        LLVMValueRef ddmax[3], ddx[3], ddy[3];
> @@ -289,12 +291,12 @@ lp_build_rho(struct lp_build_sample_context *bld,
>           }
>           rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
>           rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> -                                         perquadf_bld->type, rho_vec, 0);
> +                                         levelf_bld->type, rho_vec, 0);
>           /*
>            * note that as long as we don't care about per-pixel lod could
>            reduce math
>            * more (at some shuffle cost), but for now only do sqrt after
>            packing.
>            */
> -         rho = lp_build_sqrt(perquadf_bld, rho);
> +         rho = lp_build_sqrt(levelf_bld, rho);
>        }
>        else {
>           rho_vec = ddmax[0];
> @@ -309,7 +311,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
>            * since we can't handle per-pixel rho/lod from now on (TODO).
>            */
>           rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> -                                         perquadf_bld->type, rho_vec, 0);
> +                                         levelf_bld->type, rho_vec, 0);
>        }
>     }
>     else {
> @@ -381,8 +383,8 @@ lp_build_rho(struct lp_build_sample_context *bld,
>           rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
>  
>           rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> -                                         perquadf_bld->type, rho_vec, 0);
> -         rho = lp_build_sqrt(perquadf_bld, rho);
> +                                         levelf_bld->type, rho_vec, 0);
> +         rho = lp_build_sqrt(levelf_bld, rho);
>        }
>        else {
>           ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
> @@ -462,7 +464,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
>                 }
>              }
>              rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> -                                            perquadf_bld->type, rho, 0);
> +                                            levelf_bld->type, rho, 0);
>           }
>           else {
>              if (dims <= 1) {
> @@ -652,11 +654,11 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
>  
>  {
>     LLVMBuilderRef builder = bld->gallivm->builder;
> -   struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
> +   struct lp_build_context *levelf_bld = &bld->levelf_bld;
>     LLVMValueRef lod;
>  
> -   *out_lod_ipart = bld->perquadi_bld.zero;
> -   *out_lod_fpart = perquadf_bld->zero;
> +   *out_lod_ipart = bld->leveli_bld.zero;
> +   *out_lod_fpart = levelf_bld->zero;
>  
>     if (bld->static_sampler_state->min_max_lod_equal) {
>        /* User is forcing sampling from a particular mipmap level.
> @@ -666,12 +668,15 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
>           bld->dynamic_state->min_lod(bld->dynamic_state,
>                                       bld->gallivm, sampler_unit);
>  
> -      lod = lp_build_broadcast_scalar(perquadf_bld, min_lod);
> +      lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
>     }
>     else {
>        if (explicit_lod) {
> -         lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
> -                                         perquadf_bld->type, explicit_lod,
> 0);
> +         if (bld->num_lods != bld->coord_type.length)
> +            lod = lp_build_pack_aos_scalars(bld->gallivm,
> bld->coord_bld.type,
> +                                            levelf_bld->type, explicit_lod,
> 0);
> +         else
> +            lod = explicit_lod;
>        }
>        else {
>           LLVMValueRef rho;
> @@ -694,29 +699,29 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
>  
>              if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
>                  mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
> -               *out_lod_ipart = lp_build_ilog2(perquadf_bld, rho);
> -               *out_lod_fpart = perquadf_bld->zero;
> +               *out_lod_ipart = lp_build_ilog2(levelf_bld, rho);
> +               *out_lod_fpart = levelf_bld->zero;
>                 return;
>              }
>              if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
>                  !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
> -               lp_build_brilinear_rho(perquadf_bld, rho, BRILINEAR_FACTOR,
> +               lp_build_brilinear_rho(levelf_bld, rho, BRILINEAR_FACTOR,
>                                        out_lod_ipart, out_lod_fpart);
>                 return;
>              }
>           }
>  
>           if (0) {
> -            lod = lp_build_log2(perquadf_bld, rho);
> +            lod = lp_build_log2(levelf_bld, rho);
>           }
>           else {
> -            lod = lp_build_fast_log2(perquadf_bld, rho);
> +            lod = lp_build_fast_log2(levelf_bld, rho);
>           }
>  
>           /* add shader lod bias */
>           if (lod_bias) {
>              lod_bias = lp_build_pack_aos_scalars(bld->gallivm,
>              bld->coord_bld.type,
> -                  perquadf_bld->type, lod_bias, 0);
> +                  levelf_bld->type, lod_bias, 0);
>              lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
>           }
>        }
> @@ -726,7 +731,7 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
>           LLVMValueRef sampler_lod_bias =
>              bld->dynamic_state->lod_bias(bld->dynamic_state,
>                                           bld->gallivm, sampler_unit);
> -         sampler_lod_bias = lp_build_broadcast_scalar(perquadf_bld,
> +         sampler_lod_bias = lp_build_broadcast_scalar(levelf_bld,
>                                                        sampler_lod_bias);
>           lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias,
>           "sampler_lod_bias");
>        }
> @@ -736,33 +741,33 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
>           LLVMValueRef max_lod =
>              bld->dynamic_state->max_lod(bld->dynamic_state,
>                                          bld->gallivm, sampler_unit);
> -         max_lod = lp_build_broadcast_scalar(perquadf_bld, max_lod);
> +         max_lod = lp_build_broadcast_scalar(levelf_bld, max_lod);
>  
> -         lod = lp_build_min(perquadf_bld, lod, max_lod);
> +         lod = lp_build_min(levelf_bld, lod, max_lod);
>        }
>        if (bld->static_sampler_state->apply_min_lod) {
>           LLVMValueRef min_lod =
>              bld->dynamic_state->min_lod(bld->dynamic_state,
>                                          bld->gallivm, sampler_unit);
> -         min_lod = lp_build_broadcast_scalar(perquadf_bld, min_lod);
> +         min_lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
>  
> -         lod = lp_build_max(perquadf_bld, lod, min_lod);
> +         lod = lp_build_max(levelf_bld, lod, min_lod);
>        }
>     }
>  
>     if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
>        if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
> -         lp_build_brilinear_lod(perquadf_bld, lod, BRILINEAR_FACTOR,
> +         lp_build_brilinear_lod(levelf_bld, lod, BRILINEAR_FACTOR,
>                                  out_lod_ipart, out_lod_fpart);
>        }
>        else {
> -         lp_build_ifloor_fract(perquadf_bld, lod, out_lod_ipart,
> out_lod_fpart);
> +         lp_build_ifloor_fract(levelf_bld, lod, out_lod_ipart,
> out_lod_fpart);
>        }
>  
>        lp_build_name(*out_lod_fpart, "lod_fpart");
>     }
>     else {
> -      *out_lod_ipart = lp_build_iround(perquadf_bld, lod);
> +      *out_lod_ipart = lp_build_iround(levelf_bld, lod);
>     }
>  
>     lp_build_name(*out_lod_ipart, "lod_ipart");
> @@ -784,20 +789,20 @@ lp_build_nearest_mip_level(struct
> lp_build_sample_context *bld,
>                             LLVMValueRef lod_ipart,
>                             LLVMValueRef *level_out)
>  {
> -   struct lp_build_context *perquadi_bld = &bld->perquadi_bld;
> +   struct lp_build_context *leveli_bld = &bld->leveli_bld;
>     LLVMValueRef first_level, last_level, level;
>  
>     first_level = bld->dynamic_state->first_level(bld->dynamic_state,
>                                                   bld->gallivm,
>                                                   texture_unit);
>     last_level = bld->dynamic_state->last_level(bld->dynamic_state,
>                                                 bld->gallivm, texture_unit);
> -   first_level = lp_build_broadcast_scalar(perquadi_bld, first_level);
> -   last_level = lp_build_broadcast_scalar(perquadi_bld, last_level);
> +   first_level = lp_build_broadcast_scalar(leveli_bld, first_level);
> +   last_level = lp_build_broadcast_scalar(leveli_bld, last_level);
>  
> -   level = lp_build_add(perquadi_bld, lod_ipart, first_level);
> +   level = lp_build_add(leveli_bld, lod_ipart, first_level);
>  
>     /* clamp level to legal range of levels */
> -   *level_out = lp_build_clamp(perquadi_bld, level, first_level,
> last_level);
> +   *level_out = lp_build_clamp(leveli_bld, level, first_level, last_level);
>  }
>  
>  
> @@ -815,8 +820,8 @@ lp_build_linear_mip_levels(struct lp_build_sample_context
> *bld,
>                             LLVMValueRef *level1_out)
>  {
>     LLVMBuilderRef builder = bld->gallivm->builder;
> -   struct lp_build_context *perquadi_bld = &bld->perquadi_bld;
> -   struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
> +   struct lp_build_context *leveli_bld = &bld->leveli_bld;
> +   struct lp_build_context *levelf_bld = &bld->levelf_bld;
>     LLVMValueRef first_level, last_level;
>     LLVMValueRef clamp_min;
>     LLVMValueRef clamp_max;
> @@ -825,11 +830,11 @@ lp_build_linear_mip_levels(struct
> lp_build_sample_context *bld,
>                                                   bld->gallivm,
>                                                   texture_unit);
>     last_level = bld->dynamic_state->last_level(bld->dynamic_state,
>                                                 bld->gallivm, texture_unit);
> -   first_level = lp_build_broadcast_scalar(perquadi_bld, first_level);
> -   last_level = lp_build_broadcast_scalar(perquadi_bld, last_level);
> +   first_level = lp_build_broadcast_scalar(leveli_bld, first_level);
> +   last_level = lp_build_broadcast_scalar(leveli_bld, last_level);
>  
> -   *level0_out = lp_build_add(perquadi_bld, lod_ipart, first_level);
> -   *level1_out = lp_build_add(perquadi_bld, *level0_out, perquadi_bld->one);
> +   *level0_out = lp_build_add(leveli_bld, lod_ipart, first_level);
> +   *level1_out = lp_build_add(leveli_bld, *level0_out, leveli_bld->one);
>  
>     /*
>      * Clamp both *level0_out and *level1_out to [first_level, last_level],
>      with
> @@ -843,7 +848,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context
> *bld,
>      * converting to our lp_bld_logic helpers.
>      */
>  #if HAVE_LLVM < 0x0301
> -   assert(perquadi_bld->type.length == 1);
> +   assert(leveli_bld->type.length == 1);
>  #endif
>  
>     /* *level0_out < first_level */
> @@ -858,7 +863,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context
> *bld,
>                                   first_level, *level1_out, "");
>  
>     *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
> -                                      perquadf_bld->zero, *lod_fpart_inout,
> "");
> +                                      levelf_bld->zero, *lod_fpart_inout,
> "");
>  
>     /* *level0_out >= last_level */
>     clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
> @@ -872,7 +877,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context
> *bld,
>                                   last_level, *level1_out, "");
>  
>     *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
> -                                      perquadf_bld->zero, *lod_fpart_inout,
> "");
> +                                      levelf_bld->zero, *lod_fpart_inout,
> "");
>  
>     lp_build_name(*level0_out, "texture%u_miplevel0", texture_unit);
>     lp_build_name(*level1_out, "texture%u_miplevel1", texture_unit);
> @@ -1087,7 +1092,7 @@ lp_build_mipmap_level_sizes(struct
> lp_build_sample_context *bld,
>              LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
>  
>              ileveli = lp_build_extract_broadcast(bld->gallivm,
> -                                                 bld->perquadi_bld.type,
> +                                                 bld->leveli_bld.type,
>                                                   bld4.type,
>                                                   ilevel,
>                                                   indexi);
> @@ -1131,10 +1136,9 @@ lp_build_mipmap_level_sizes(struct
> lp_build_sample_context *bld,
>                 tmp[i] = bld->int_size;
>                 tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i],
>                 ilevel1);
>              }
> -            int_size_vec = lp_build_concat(bld->gallivm,
> -                                           tmp,
> -                                           bld->int_size_in_bld.type,
> -                                           bld->num_lods);
> +            *out_size = lp_build_concat(bld->gallivm, tmp,
> +                                        bld->int_size_in_bld.type,
> +                                        bld->num_lods);
>           }
>        }
>     }
> @@ -1218,10 +1222,10 @@ lp_build_extract_image_sizes(struct
> lp_build_sample_context *bld,
>           *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
>                                                  coord_type, size, 0);
>           if (dims >= 2) {
> -            *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
> -                                                   coord_type, size, 1);
> +            *out_height = lp_build_pack_aos_scalars(bld->gallivm, size_type,
> +                                                    coord_type, size, 1);
>              if (dims == 3) {
> -               *out_width = lp_build_pack_aos_scalars(bld->gallivm,
> size_type,
> +               *out_depth = lp_build_pack_aos_scalars(bld->gallivm,
> size_type,
>                                                        coord_type, size, 2);
>              }
>           }
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> index cde8ce9..a3ecc05 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> @@ -268,13 +268,13 @@ struct lp_build_sample_context
>     struct lp_type texel_type;
>     struct lp_build_context texel_bld;
>  
> -   /** Float per-quad type */
> -   struct lp_type perquadf_type;
> -   struct lp_build_context perquadf_bld;
> +   /** Float level type */
> +   struct lp_type levelf_type;
> +   struct lp_build_context levelf_bld;
>  
> -   /** Int per-quad type */
> -   struct lp_type perquadi_type;
> -   struct lp_build_context perquadi_bld;
> +   /** Int level type */
> +   struct lp_type leveli_type;
> +   struct lp_build_context leveli_bld;
>  
>     /* Common dynamic state values */
>     LLVMValueRef row_stride_array;
> @@ -477,6 +477,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>                      const struct lp_derivatives *derivs,
>                      LLVMValueRef lod_bias,
>                      LLVMValueRef explicit_lod,
> +                    boolean scalar_lod,
>                      LLVMValueRef texel_out[4]);
>  
>  
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
> index 104c24d..da416aa 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
> @@ -1422,8 +1422,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>  
>     if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
>        LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
> -                                                     bld->perquadf_bld.type,
> 256.0);
> -      LLVMTypeRef i32vec_type = lp_build_vec_type(bld->gallivm,
> bld->perquadi_bld.type);
> +                                                     bld->levelf_bld.type,
> 256.0);
> +      LLVMTypeRef i32vec_type = bld->leveli_bld.vec_type;
>        struct lp_build_if_state if_ctx;
>        LLVMValueRef need_lerp;
>        unsigned num_quads = bld->coord_bld.type.length / 4;
> @@ -1433,9 +1433,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>        lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type,
>        "lod_fpart.fixed16");
>  
>        /* need_lerp = lod_fpart > 0 */
> -      if (num_quads == 1) {
> +      if (bld->num_lods == 1) {
>           need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
> -                                   lod_fpart, bld->perquadi_bld.zero,
> +                                   lod_fpart, bld->leveli_bld.zero,
>                                     "need_lerp");
>        }
>        else {
> @@ -1450,9 +1450,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>            * lod_fpart values have same sign.
>            * We can however then skip the greater than comparison.
>            */
> -         lod_fpart = lp_build_max(&bld->perquadi_bld, lod_fpart,
> -                                  bld->perquadi_bld.zero);
> -         need_lerp = lp_build_any_true_range(&bld->perquadi_bld, num_quads,
> lod_fpart);
> +         lod_fpart = lp_build_max(&bld->leveli_bld, lod_fpart,
> +                                  bld->leveli_bld.zero);
> +         need_lerp = lp_build_any_true_range(&bld->leveli_bld,
> bld->num_lods, lod_fpart);
>        }
>  
>        lp_build_if(&if_ctx, bld->gallivm, need_lerp);
> @@ -1465,9 +1465,6 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>           lp_build_mipmap_level_sizes(bld, ilevel1,
>                                       &size1,
>                                       &row_stride1_vec, &img_stride1_vec);
> -         lp_build_mipmap_level_sizes(bld, ilevel1,
> -                                     &size1,
> -                                     &row_stride1_vec, &img_stride1_vec);
>           if (bld->num_lods == 1) {
>              data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
>           }
> @@ -1511,7 +1508,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>  
>           /* interpolate samples from the two mipmap levels */
>  
> -         if (num_quads == 1) {
> +         if (num_quads == 1 && bld->num_lods == 1) {
>              lod_fpart = LLVMBuildTrunc(builder, lod_fpart,
>              u8n_bld.elem_type, "");
>              lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart);
>  
> @@ -1526,17 +1523,16 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>  #endif
>           }
>           else {
> -            const unsigned num_chans_per_quad = 4 * 4;
> -            LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type,
> bld->perquadi_bld.type.length);
> +            unsigned num_chans_per_lod = 4 * bld->coord_type.length /
> bld->num_lods;
> +            LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type,
> bld->leveli_bld.type.length);
>              LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
>  
>              /* Take the LSB of lod_fpart */
>              lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type,
>              "");
>  
>              /* Broadcast each lod weight into their respective channels */
> -            assert(u8n_bld.type.length == num_quads * num_chans_per_quad);
>              for (i = 0; i < u8n_bld.type.length; ++i) {
> -               shuffle[i] = lp_build_const_int32(bld->gallivm, i /
> num_chans_per_quad);
> +               shuffle[i] = lp_build_const_int32(bld->gallivm, i /
> num_chans_per_lod);
>              }
>              lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart,
>              LLVMGetUndef(tmp_vec_type),
>                                                 LLVMConstVector(shuffle,
>                                                 u8n_bld.type.length), "");
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> index e0a59d0..07fa47e 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> @@ -979,17 +979,17 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>     if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
>        struct lp_build_if_state if_ctx;
>        LLVMValueRef need_lerp;
> -      unsigned num_quads = bld->coord_bld.type.length / 4;
>  
>        /* need_lerp = lod_fpart > 0 */
> -      if (num_quads == 1) {
> +      if (bld->num_lods == 1) {
>           need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
> -                                   lod_fpart, bld->perquadf_bld.zero,
> +                                   lod_fpart, bld->levelf_bld.zero,
>                                     "need_lerp");
>        }
>        else {
>           /*
> -          * We'll do mip filtering if any of the quads need it.
> +          * We'll do mip filtering if any of the quads (or individual
> +          * pixel in case of per-pixel lod) need it.
>            * It might be better to split the vectors here and only
>            fetch/filter
>            * quads which need it.
>            */
> @@ -998,13 +998,13 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>            * negative values which would screw up filtering if not all
>            * lod_fpart values have same sign.
>            */
> -         lod_fpart = lp_build_max(&bld->perquadf_bld, lod_fpart,
> -                                  bld->perquadf_bld.zero);
> -         need_lerp = lp_build_compare(bld->gallivm, bld->perquadf_bld.type,
> +         lod_fpart = lp_build_max(&bld->levelf_bld, lod_fpart,
> +                                  bld->levelf_bld.zero);
> +         need_lerp = lp_build_compare(bld->gallivm, bld->levelf_bld.type,
>                                        PIPE_FUNC_GREATER,
> -                                      lod_fpart, bld->perquadf_bld.zero);
> -         need_lerp = lp_build_any_true_range(&bld->perquadi_bld, num_quads,
> need_lerp);
> -     }
> +                                      lod_fpart, bld->levelf_bld.zero);
> +         need_lerp = lp_build_any_true_range(&bld->leveli_bld,
> bld->num_lods, need_lerp);
> +      }
>  
>        lp_build_if(&if_ctx, bld->gallivm, need_lerp);
>        {
> @@ -1036,10 +1036,11 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>  
>           /* interpolate samples from the two mipmap levels */
>  
> -         lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
> -
> bld->perquadf_bld.type,
> -
> bld->texel_bld.type,
> -                                                           lod_fpart);
> +         if (bld->num_lods != bld->coord_type.length)
> +            lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
> +
> bld->levelf_bld.type,
> +
> bld->texel_bld.type,
> +                                                              lod_fpart);
>  
>           for (chan = 0; chan < 4; chan++) {
>              colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
> @@ -1143,7 +1144,7 @@ lp_build_sample_common(struct lp_build_sample_context
> *bld,
>                              mip_filter,
>                              lod_ipart, lod_fpart);
>     } else {
> -      *lod_ipart = bld->perquadi_bld.zero;
> +      *lod_ipart = bld->leveli_bld.zero;
>     }
>  
>     /*
> @@ -1166,7 +1167,7 @@ lp_build_sample_common(struct lp_build_sample_context
> *bld,
>        else {
>           first_level = bld->dynamic_state->first_level(bld->dynamic_state,
>                                                         bld->gallivm,
>                                                         texture_index);
> -         first_level = lp_build_broadcast_scalar(&bld->perquadi_bld,
> first_level);
> +         first_level = lp_build_broadcast_scalar(&bld->leveli_bld,
> first_level);
>           *ilevel0 = first_level;
>        }
>        break;
> @@ -1295,7 +1296,7 @@ lp_build_fetch_texel(struct lp_build_sample_context
> *bld,
>                       const LLVMValueRef *offsets,
>                       LLVMValueRef *colors_out)
>  {
> -   struct lp_build_context *perquadi_bld = &bld->perquadi_bld;
> +   struct lp_build_context *perquadi_bld = &bld->leveli_bld;
>     struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
>     unsigned dims = bld->dims, chan;
>     unsigned target = bld->static_texture_state->target;
> @@ -1307,8 +1308,13 @@ lp_build_fetch_texel(struct lp_build_sample_context
> *bld,
>  
>     /* XXX just like ordinary sampling, we don't handle per-pixel lod (yet).
>     */
>     if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
> -      ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
> -                                         perquadi_bld->type, explicit_lod,
> 0);
> +      if (bld->num_lods != int_coord_bld->type.length) {
> +         ilevel = lp_build_pack_aos_scalars(bld->gallivm,
> int_coord_bld->type,
> +                                            perquadi_bld->type,
> explicit_lod, 0);
> +      }
> +      else {
> +         ilevel = explicit_lod;
> +      }
>        lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel);
>     }
>     else {
> @@ -1489,6 +1495,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>                      const struct lp_derivatives *derivs, /* optional */
>                      LLVMValueRef lod_bias, /* optional */
>                      LLVMValueRef explicit_lod, /* optional */
> +                    boolean scalar_lod,
>                      LLVMValueRef texel_out[4])
>  {
>     unsigned dims = texture_dims(static_texture_state->target);
> @@ -1529,10 +1536,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>     bld.float_size_in_type.length = dims > 1 ? 4 : 1;
>     bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
>     bld.texel_type = type;
> -   bld.perquadf_type = type;
> -   /* we want native vector size to be able to use our intrinsics */
> -   bld.perquadf_type.length = type.length > 4 ? ((type.length + 15) / 16) *
> 4 : 1;
> -   bld.perquadi_type = lp_int_type(bld.perquadf_type);
>  
>     /* always using the first channel hopefully should be safe,
>      * if not things WILL break in other places anyway.
> @@ -1567,17 +1570,31 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>      * There are other situations where at least the multiple int lods could
>      be
>      * avoided like min and max lod being equal.
>      */
> -   if ((is_fetch && explicit_lod && bld.static_texture_state->target !=
> PIPE_BUFFER) ||
> -       (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
> +   if (explicit_lod && !scalar_lod &&
> +       ((is_fetch && bld.static_texture_state->target != PIPE_BUFFER) ||
> +        (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
> +      bld.num_lods = type.length;
> +   /* TODO: for true scalar_lod should only use 1 lod value */
> +   else if (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE) {
>        bld.num_lods = num_quads;
>     }
>     else {
>        bld.num_lods = 1;
>     }
>  
> +   bld.levelf_type = type;
> +   /* we want native vector size to be able to use our intrinsics */
> +   if (bld.num_lods != type.length) {
> +      bld.levelf_type.length = type.length > 4 ? ((type.length + 15) / 16) *
> 4 : 1;
> +   }
> +   bld.leveli_type = lp_int_type(bld.levelf_type);
>     bld.float_size_type = bld.float_size_in_type;
> -   bld.float_size_type.length = bld.num_lods > 1 ? type.length :
> -                                   bld.float_size_in_type.length;
> +   /* the vectors here are GIGANTIC (up to 32xf32), rely on llvm there */
> +   if (bld.num_lods > 1) {
> +      bld.float_size_type.length = bld.num_lods == type.length ?
> +                                      bld.num_lods *
> bld.float_size_in_type.length :
> +                                      type.length;
> +   }
>     bld.int_size_type = lp_int_type(bld.float_size_type);
>  
>     lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
> @@ -1590,8 +1607,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>     lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
>     lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
>     lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
> -   lp_build_context_init(&bld.perquadf_bld, gallivm, bld.perquadf_type);
> -   lp_build_context_init(&bld.perquadi_bld, gallivm, bld.perquadi_type);
> +   lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
> +   lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
>  
>     /* Get the dynamic state */
>     tex_width = dynamic_state->width(dynamic_state, gallivm, texture_index);
> @@ -1735,14 +1752,32 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>           bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
>           bld4.texel_type = bld.texel_type;
>           bld4.texel_type.length = 4;
> -         bld4.perquadf_type = type4;
> +         bld4.levelf_type = type4;
>           /* we want native vector size to be able to use our intrinsics */
> -         bld4.perquadf_type.length = 1;
> -         bld4.perquadi_type = lp_int_type(bld4.perquadf_type);
> +         bld4.levelf_type.length = 1;
> +         bld4.leveli_type = lp_int_type(bld4.levelf_type);
> +
> +         if (explicit_lod && !scalar_lod &&
> +             ((is_fetch && bld.static_texture_state->target != PIPE_BUFFER)
> ||
> +              (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
> +            bld4.num_lods = type4.length;
> +         else
> +            bld4.num_lods = 1;
>  
> -         bld4.num_lods = 1;
> -         bld4.int_size_type = bld4.int_size_in_type;
> +         bld4.levelf_type = type4;
> +         /* we want native vector size to be able to use our intrinsics */
> +         if (bld4.num_lods != type4.length) {
> +            bld4.levelf_type.length = 1;
> +         }
> +         bld4.leveli_type = lp_int_type(bld4.levelf_type);
>           bld4.float_size_type = bld4.float_size_in_type;
> +         /* the vectors here are GIGANTIC (up to 16xf32) */
> +         if (bld4.num_lods > 1) {
> +            bld4.float_size_type.length = bld4.num_lods == type4.length ?
> +                                            bld4.num_lods *
> bld4.float_size_in_type.length :
> +                                            type4.length;
> +         }
> +         bld4.int_size_type = lp_int_type(bld4.float_size_type);
>  
>           lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
>           lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
> @@ -1754,15 +1789,15 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>           lp_build_context_init(&bld4.int_size_bld, gallivm,
>           bld4.int_size_type);
>           lp_build_context_init(&bld4.float_size_bld, gallivm,
>           bld4.float_size_type);
>           lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
> -         lp_build_context_init(&bld4.perquadf_bld, gallivm,
> bld4.perquadf_type);
> -         lp_build_context_init(&bld4.perquadi_bld, gallivm,
> bld4.perquadi_type);
> +         lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
> +         lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
>  
>           for (i = 0; i < num_quads; i++) {
>              LLVMValueRef s4, t4, r4;
> -            LLVMValueRef lod_iparts, lod_fparts = NULL;
> -            LLVMValueRef ilevel0s, ilevel1s = NULL;
> -            LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
> +            LLVMValueRef lod_ipart4, lod_fpart4 = NULL;
> +            LLVMValueRef ilevel04, ilevel14 = NULL;
>              LLVMValueRef offsets4[4] = { NULL };
> +            unsigned num_lods = bld4.num_lods;
>  
>              s4 = lp_build_extract_range(gallivm, s, 4*i, 4);
>              t4 = lp_build_extract_range(gallivm, t, 4*i, 4);
> @@ -1777,27 +1812,27 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>                    }
>                 }
>              }
> -            lod_iparts = LLVMBuildExtractElement(builder, lod_ipart, indexi,
> "");
> -            ilevel0s = LLVMBuildExtractElement(builder, ilevel0, indexi,
> "");
> +            lod_ipart4 = lp_build_extract_range(gallivm, lod_ipart, num_lods
> * i, num_lods);
> +            ilevel04 = lp_build_extract_range(gallivm, ilevel0, num_lods *
> i, num_lods);
>              if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
> -               ilevel1s = LLVMBuildExtractElement(builder, ilevel1, indexi,
> "");
> -               lod_fparts = LLVMBuildExtractElement(builder, lod_fpart,
> indexi, "");
> +               ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods
> * i, num_lods);
> +               lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart,
> num_lods * i, num_lods);
>              }
>  
>              if (use_aos) {
>                 /* do sampling/filtering with fixed pt arithmetic */
>                 lp_build_sample_aos(&bld4, sampler_index,
>                                     s4, t4, r4, offsets4,
> -                                   lod_iparts, lod_fparts,
> -                                   ilevel0s, ilevel1s,
> +                                   lod_ipart4, lod_fpart4,
> +                                   ilevel04, ilevel14,
>                                     texelout4);
>              }
>  
>              else {
>                 lp_build_sample_general(&bld4, sampler_index,
>                                         s4, t4, r4, offsets4,
> -                                       lod_iparts, lod_fparts,
> -                                       ilevel0s, ilevel1s,
> +                                       lod_ipart4, lod_fpart4,
> +                                       ilevel04, ilevel14,
>                                         texelout4);
>              }
>              for (j = 0; j < 4; j++) {
> @@ -1864,6 +1899,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
>     lp_build_context_init(&bld_int_vec, gallivm, lp_type_int_vec(32, 128));
>  
>     if (explicit_lod) {
> +      /* FIXME: this needs to honor per-element lod */
>        lod = LLVMBuildExtractElement(gallivm->builder, explicit_lod,
>        lp_build_const_int32(gallivm, 0), "");
>        first_level = dynamic_state->first_level(dynamic_state, gallivm,
>        texture_unit);
>        lod = lp_build_broadcast_scalar(&bld_int_vec,
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> index fd566b1..0b48450 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> @@ -184,6 +184,7 @@ struct lp_build_sampler_soa
>                          const struct lp_derivatives *derivs,
>                          LLVMValueRef lod_bias, /* optional */
>                          LLVMValueRef explicit_lod, /* optional */
> +                        boolean scalar_lod,
>                          LLVMValueRef *texel);
>  
>     void
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index 0bbc408..862be0a 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -1576,6 +1576,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
>     LLVMValueRef offsets[3] = { NULL };
>     struct lp_derivatives derivs;
>     struct lp_derivatives *deriv_ptr = NULL;
> +   boolean scalar_lod;
>     unsigned num_coords, num_derivs, num_offsets;
>     unsigned i;
>  
> @@ -1693,6 +1694,9 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
>        }
>     }
>  
> +   /* TODO: use scalar lod if explicit_lod, lod_bias or derivs are
> broadcasted scalars */
> +   scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
> +
>     bld->sampler->emit_fetch_texel(bld->sampler,
>                                    bld->bld_base.base.gallivm,
>                                    bld->bld_base.base.type,
> @@ -1701,7 +1705,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
>                                    coords,
>                                    offsets,
>                                    deriv_ptr,
> -                                  lod_bias, explicit_lod,
> +                                  lod_bias, explicit_lod, scalar_lod,
>                                    texel);
>  }
>  
> @@ -1719,6 +1723,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
>     LLVMValueRef offsets[3] = { NULL };
>     struct lp_derivatives derivs;
>     struct lp_derivatives *deriv_ptr = NULL;
> +   boolean scalar_lod;
>     unsigned num_coords, num_offsets, num_derivs;
>     unsigned i;
>  
> @@ -1836,6 +1841,9 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
>        }
>     }
>  
> +   /* TODO: use scalar lod if explicit_lod, lod_bias or derivs are
> broadcasted scalars */
> +   scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
> +
>     bld->sampler->emit_fetch_texel(bld->sampler,
>                                    bld->bld_base.base.gallivm,
>                                    bld->bld_base.base.type,
> @@ -1844,7 +1852,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
>                                    coords,
>                                    offsets,
>                                    deriv_ptr,
> -                                  lod_bias, explicit_lod,
> +                                  lod_bias, explicit_lod, scalar_lod,
>                                    texel);
>  }
>  
> @@ -1859,6 +1867,7 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context
> *bld,
>     LLVMValueRef explicit_lod = NULL;
>     LLVMValueRef coords[3];
>     LLVMValueRef offsets[3] = { NULL };
> +   boolean scalar_lod;
>     unsigned num_coords;
>     unsigned dims;
>     unsigned i;
> @@ -1927,6 +1936,9 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context
> *bld,
>        }
>     }
>  
> +   /* TODO: use scalar lod if explicit_lod is broadcasted scalar */
> +   scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
> +
>     bld->sampler->emit_fetch_texel(bld->sampler,
>                                    bld->bld_base.base.gallivm,
>                                    bld->bld_base.base.type,
> @@ -1935,7 +1947,7 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context
> *bld,
>                                    coords,
>                                    offsets,
>                                    NULL,
> -                                  NULL, explicit_lod,
> +                                  NULL, explicit_lod, scalar_lod,
>                                    texel);
>  }
>  
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c
> b/src/gallium/auxiliary/tgsi/tgsi_scan.c
> index 2a37b38..a473782 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
> @@ -72,6 +72,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
>            procType == TGSI_PROCESSOR_VERTEX ||
>            procType == TGSI_PROCESSOR_GEOMETRY ||
>            procType == TGSI_PROCESSOR_COMPUTE);
> +   info->processor = procType;
>  
>  
>     /**
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h
> b/src/gallium/auxiliary/tgsi/tgsi_scan.h
> index 91eef67..b62c462 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
> @@ -54,6 +54,8 @@ struct tgsi_shader_info
>     ubyte num_system_values;
>     ubyte system_value_semantic_name[PIPE_MAX_SHADER_INPUTS];
>  
> +   ubyte processor;
> +
>     uint file_mask[TGSI_FILE_COUNT];  /**< bitmask of declared registers */
>     uint file_count[TGSI_FILE_COUNT];  /**< number of declared registers */
>     int file_max[TGSI_FILE_COUNT];  /**< highest index of declared registers
>     */
> diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c
> b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
> index df2a610..2fb6f5b 100644
> --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c
> +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
> @@ -244,6 +244,7 @@ lp_llvm_sampler_soa_emit_fetch_texel(const struct
> lp_build_sampler_soa *base,
>                                       const struct lp_derivatives *derivs,
>                                       LLVMValueRef lod_bias, /* optional */
>                                       LLVMValueRef explicit_lod, /* optional
>                                       */
> +                                     boolean scalar_lod,
>                                       LLVMValueRef *texel)
>  {
>     struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base;
> @@ -267,7 +268,7 @@ lp_llvm_sampler_soa_emit_fetch_texel(const struct
> lp_build_sampler_soa *base,
>                         coords,
>                         offsets,
>                         derivs,
> -                       lod_bias, explicit_lod,
> +                       lod_bias, explicit_lod, scalar_lod,
>                         texel);
>  }
>  
> --
> 1.7.9.5
>