[Mesa-dev] [PATCH] gallivm: do per-pixel lod calculations for explicit lod
Jose Fonseca
jfonseca at vmware.com
Wed Jul 3 08:28:10 PDT 2013
I don't fully grasp every detail (many paths), but looks good in principle.
Where do the 16xf32 vectors come from?
Also, please add a comment somewhere summarizing all the code paths for lod handling:
- AVX vs non AVX
- SOA vs AOS
- scalar lod vs stamp lod
But I couldn't spot anything wrong.
Jose
----- Original Message -----
> From: Roland Scheidegger <sroland at vmware.com>
>
> d3d10 requires per-pixel lod calculations for explicit lod, lod bias and
> explicit derivatives, and we should probably do it for OpenGL too - at least
> if they are used from vertex or geometry shaders (so doesn't apply to lod
> bias) this doesn't just affect neighboring pixels.
> Some code was already there to handle this so fix it up and enable it.
> There will no doubt be a performance hit unfortunately, we could do better
> if we'd knew we had a real vector shift instruction (with variable shift
> count) but this requires AVX2 on x86 (or a AMD Bulldozer family cpu).
> Don't do anything for lod bias and explicit derivatives yet, though
> no special magic should be needed for them neither.
> Likewise, the size query is still broken just the same.
>
> v2: Use information if lod is a (broadcast) scalar or not. The idea would be
> to base this on the actual value, for now just pretend it's a scalar in fs
> and not a scalar otherwise (so, per-pixel lod is only used in gs/vs but same
> code is generated for fs as before).
> ---
> src/gallium/auxiliary/draw/draw_llvm_sample.c | 3 +-
> src/gallium/auxiliary/gallivm/lp_bld_sample.c | 110 ++++++++---------
> src/gallium/auxiliary/gallivm/lp_bld_sample.h | 13 ++-
> src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | 26 ++---
> src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 130
> +++++++++++++--------
> src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 1 +
> src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 18 ++-
> src/gallium/auxiliary/tgsi/tgsi_scan.c | 1 +
> src/gallium/auxiliary/tgsi/tgsi_scan.h | 2 +
> src/gallium/drivers/llvmpipe/lp_tex_sample.c | 3 +-
> 10 files changed, 181 insertions(+), 126 deletions(-)
>
> diff --git a/src/gallium/auxiliary/draw/draw_llvm_sample.c
> b/src/gallium/auxiliary/draw/draw_llvm_sample.c
> index e51e011..0cb5c21 100644
> --- a/src/gallium/auxiliary/draw/draw_llvm_sample.c
> +++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c
> @@ -238,6 +238,7 @@ draw_llvm_sampler_soa_emit_fetch_texel(const struct
> lp_build_sampler_soa *base,
> const struct lp_derivatives *derivs,
> LLVMValueRef lod_bias, /* optional */
> LLVMValueRef explicit_lod, /*
> optional */
> + boolean scalar_lod,
> LLVMValueRef *texel)
> {
> struct draw_llvm_sampler_soa *sampler = (struct draw_llvm_sampler_soa
> *)base;
> @@ -256,7 +257,7 @@ draw_llvm_sampler_soa_emit_fetch_texel(const struct
> lp_build_sampler_soa *base,
> coords,
> offsets,
> derivs,
> - lod_bias, explicit_lod,
> + lod_bias, explicit_lod, scalar_lod,
> texel);
> }
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> index d689c7b..c2efec9 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> @@ -215,7 +215,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
> struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
> struct lp_build_context *float_bld = &bld->float_bld;
> struct lp_build_context *coord_bld = &bld->coord_bld;
> - struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
> + struct lp_build_context *levelf_bld = &bld->levelf_bld;
> const unsigned dims = bld->dims;
> LLVMValueRef ddx_ddy[2];
> LLVMBuilderRef builder = bld->gallivm->builder;
> @@ -235,6 +235,8 @@ lp_build_rho(struct lp_build_sample_context *bld,
>
> /* Note that all simplified calculations will only work for isotropic
> filtering */
>
> + assert(bld->num_lods != length);
> +
> first_level = bld->dynamic_state->first_level(bld->dynamic_state,
> bld->gallivm,
> texture_unit);
> first_level_vec = lp_build_broadcast_scalar(int_size_bld, first_level);
> @@ -248,14 +250,14 @@ lp_build_rho(struct lp_build_sample_context *bld,
> * Cube map code did already everything except size mul and per-quad
> extraction.
> */
> rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> - perquadf_bld->type, cube_rho, 0);
> + levelf_bld->type, cube_rho, 0);
> if (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) {
> - rho = lp_build_sqrt(perquadf_bld, rho);
> + rho = lp_build_sqrt(levelf_bld, rho);
> }
> /* Could optimize this for single quad just skip the broadcast */
> cubesize = lp_build_extract_broadcast(gallivm,
> bld->float_size_in_type,
> - perquadf_bld->type, float_size,
> index0);
> - rho = lp_build_mul(perquadf_bld, cubesize, rho);
> + levelf_bld->type, float_size,
> index0);
> + rho = lp_build_mul(levelf_bld, cubesize, rho);
> }
> else if (derivs && !(bld->static_texture_state->target ==
> PIPE_TEXTURE_CUBE)) {
> LLVMValueRef ddmax[3], ddx[3], ddy[3];
> @@ -289,12 +291,12 @@ lp_build_rho(struct lp_build_sample_context *bld,
> }
> rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
> rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> - perquadf_bld->type, rho_vec, 0);
> + levelf_bld->type, rho_vec, 0);
> /*
> * note that as long as we don't care about per-pixel lod could
> reduce math
> * more (at some shuffle cost), but for now only do sqrt after
> packing.
> */
> - rho = lp_build_sqrt(perquadf_bld, rho);
> + rho = lp_build_sqrt(levelf_bld, rho);
> }
> else {
> rho_vec = ddmax[0];
> @@ -309,7 +311,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
> * since we can't handle per-pixel rho/lod from now on (TODO).
> */
> rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> - perquadf_bld->type, rho_vec, 0);
> + levelf_bld->type, rho_vec, 0);
> }
> }
> else {
> @@ -381,8 +383,8 @@ lp_build_rho(struct lp_build_sample_context *bld,
> rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
>
> rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> - perquadf_bld->type, rho_vec, 0);
> - rho = lp_build_sqrt(perquadf_bld, rho);
> + levelf_bld->type, rho_vec, 0);
> + rho = lp_build_sqrt(levelf_bld, rho);
> }
> else {
> ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
> @@ -462,7 +464,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
> }
> }
> rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> - perquadf_bld->type, rho, 0);
> + levelf_bld->type, rho, 0);
> }
> else {
> if (dims <= 1) {
> @@ -652,11 +654,11 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
>
> {
> LLVMBuilderRef builder = bld->gallivm->builder;
> - struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
> + struct lp_build_context *levelf_bld = &bld->levelf_bld;
> LLVMValueRef lod;
>
> - *out_lod_ipart = bld->perquadi_bld.zero;
> - *out_lod_fpart = perquadf_bld->zero;
> + *out_lod_ipart = bld->leveli_bld.zero;
> + *out_lod_fpart = levelf_bld->zero;
>
> if (bld->static_sampler_state->min_max_lod_equal) {
> /* User is forcing sampling from a particular mipmap level.
> @@ -666,12 +668,15 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
> bld->dynamic_state->min_lod(bld->dynamic_state,
> bld->gallivm, sampler_unit);
>
> - lod = lp_build_broadcast_scalar(perquadf_bld, min_lod);
> + lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
> }
> else {
> if (explicit_lod) {
> - lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
> - perquadf_bld->type, explicit_lod,
> 0);
> + if (bld->num_lods != bld->coord_type.length)
> + lod = lp_build_pack_aos_scalars(bld->gallivm,
> bld->coord_bld.type,
> + levelf_bld->type, explicit_lod,
> 0);
> + else
> + lod = explicit_lod;
> }
> else {
> LLVMValueRef rho;
> @@ -694,29 +699,29 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
>
> if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
> mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
> - *out_lod_ipart = lp_build_ilog2(perquadf_bld, rho);
> - *out_lod_fpart = perquadf_bld->zero;
> + *out_lod_ipart = lp_build_ilog2(levelf_bld, rho);
> + *out_lod_fpart = levelf_bld->zero;
> return;
> }
> if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
> !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
> - lp_build_brilinear_rho(perquadf_bld, rho, BRILINEAR_FACTOR,
> + lp_build_brilinear_rho(levelf_bld, rho, BRILINEAR_FACTOR,
> out_lod_ipart, out_lod_fpart);
> return;
> }
> }
>
> if (0) {
> - lod = lp_build_log2(perquadf_bld, rho);
> + lod = lp_build_log2(levelf_bld, rho);
> }
> else {
> - lod = lp_build_fast_log2(perquadf_bld, rho);
> + lod = lp_build_fast_log2(levelf_bld, rho);
> }
>
> /* add shader lod bias */
> if (lod_bias) {
> lod_bias = lp_build_pack_aos_scalars(bld->gallivm,
> bld->coord_bld.type,
> - perquadf_bld->type, lod_bias, 0);
> + levelf_bld->type, lod_bias, 0);
> lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
> }
> }
> @@ -726,7 +731,7 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
> LLVMValueRef sampler_lod_bias =
> bld->dynamic_state->lod_bias(bld->dynamic_state,
> bld->gallivm, sampler_unit);
> - sampler_lod_bias = lp_build_broadcast_scalar(perquadf_bld,
> + sampler_lod_bias = lp_build_broadcast_scalar(levelf_bld,
> sampler_lod_bias);
> lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias,
> "sampler_lod_bias");
> }
> @@ -736,33 +741,33 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
> LLVMValueRef max_lod =
> bld->dynamic_state->max_lod(bld->dynamic_state,
> bld->gallivm, sampler_unit);
> - max_lod = lp_build_broadcast_scalar(perquadf_bld, max_lod);
> + max_lod = lp_build_broadcast_scalar(levelf_bld, max_lod);
>
> - lod = lp_build_min(perquadf_bld, lod, max_lod);
> + lod = lp_build_min(levelf_bld, lod, max_lod);
> }
> if (bld->static_sampler_state->apply_min_lod) {
> LLVMValueRef min_lod =
> bld->dynamic_state->min_lod(bld->dynamic_state,
> bld->gallivm, sampler_unit);
> - min_lod = lp_build_broadcast_scalar(perquadf_bld, min_lod);
> + min_lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
>
> - lod = lp_build_max(perquadf_bld, lod, min_lod);
> + lod = lp_build_max(levelf_bld, lod, min_lod);
> }
> }
>
> if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
> if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
> - lp_build_brilinear_lod(perquadf_bld, lod, BRILINEAR_FACTOR,
> + lp_build_brilinear_lod(levelf_bld, lod, BRILINEAR_FACTOR,
> out_lod_ipart, out_lod_fpart);
> }
> else {
> - lp_build_ifloor_fract(perquadf_bld, lod, out_lod_ipart,
> out_lod_fpart);
> + lp_build_ifloor_fract(levelf_bld, lod, out_lod_ipart,
> out_lod_fpart);
> }
>
> lp_build_name(*out_lod_fpart, "lod_fpart");
> }
> else {
> - *out_lod_ipart = lp_build_iround(perquadf_bld, lod);
> + *out_lod_ipart = lp_build_iround(levelf_bld, lod);
> }
>
> lp_build_name(*out_lod_ipart, "lod_ipart");
> @@ -784,20 +789,20 @@ lp_build_nearest_mip_level(struct
> lp_build_sample_context *bld,
> LLVMValueRef lod_ipart,
> LLVMValueRef *level_out)
> {
> - struct lp_build_context *perquadi_bld = &bld->perquadi_bld;
> + struct lp_build_context *leveli_bld = &bld->leveli_bld;
> LLVMValueRef first_level, last_level, level;
>
> first_level = bld->dynamic_state->first_level(bld->dynamic_state,
> bld->gallivm,
> texture_unit);
> last_level = bld->dynamic_state->last_level(bld->dynamic_state,
> bld->gallivm, texture_unit);
> - first_level = lp_build_broadcast_scalar(perquadi_bld, first_level);
> - last_level = lp_build_broadcast_scalar(perquadi_bld, last_level);
> + first_level = lp_build_broadcast_scalar(leveli_bld, first_level);
> + last_level = lp_build_broadcast_scalar(leveli_bld, last_level);
>
> - level = lp_build_add(perquadi_bld, lod_ipart, first_level);
> + level = lp_build_add(leveli_bld, lod_ipart, first_level);
>
> /* clamp level to legal range of levels */
> - *level_out = lp_build_clamp(perquadi_bld, level, first_level,
> last_level);
> + *level_out = lp_build_clamp(leveli_bld, level, first_level, last_level);
> }
>
>
> @@ -815,8 +820,8 @@ lp_build_linear_mip_levels(struct lp_build_sample_context
> *bld,
> LLVMValueRef *level1_out)
> {
> LLVMBuilderRef builder = bld->gallivm->builder;
> - struct lp_build_context *perquadi_bld = &bld->perquadi_bld;
> - struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
> + struct lp_build_context *leveli_bld = &bld->leveli_bld;
> + struct lp_build_context *levelf_bld = &bld->levelf_bld;
> LLVMValueRef first_level, last_level;
> LLVMValueRef clamp_min;
> LLVMValueRef clamp_max;
> @@ -825,11 +830,11 @@ lp_build_linear_mip_levels(struct
> lp_build_sample_context *bld,
> bld->gallivm,
> texture_unit);
> last_level = bld->dynamic_state->last_level(bld->dynamic_state,
> bld->gallivm, texture_unit);
> - first_level = lp_build_broadcast_scalar(perquadi_bld, first_level);
> - last_level = lp_build_broadcast_scalar(perquadi_bld, last_level);
> + first_level = lp_build_broadcast_scalar(leveli_bld, first_level);
> + last_level = lp_build_broadcast_scalar(leveli_bld, last_level);
>
> - *level0_out = lp_build_add(perquadi_bld, lod_ipart, first_level);
> - *level1_out = lp_build_add(perquadi_bld, *level0_out, perquadi_bld->one);
> + *level0_out = lp_build_add(leveli_bld, lod_ipart, first_level);
> + *level1_out = lp_build_add(leveli_bld, *level0_out, leveli_bld->one);
>
> /*
> * Clamp both *level0_out and *level1_out to [first_level, last_level],
> with
> @@ -843,7 +848,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context
> *bld,
> * converting to our lp_bld_logic helpers.
> */
> #if HAVE_LLVM < 0x0301
> - assert(perquadi_bld->type.length == 1);
> + assert(leveli_bld->type.length == 1);
> #endif
>
> /* *level0_out < first_level */
> @@ -858,7 +863,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context
> *bld,
> first_level, *level1_out, "");
>
> *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
> - perquadf_bld->zero, *lod_fpart_inout,
> "");
> + levelf_bld->zero, *lod_fpart_inout,
> "");
>
> /* *level0_out >= last_level */
> clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
> @@ -872,7 +877,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context
> *bld,
> last_level, *level1_out, "");
>
> *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
> - perquadf_bld->zero, *lod_fpart_inout,
> "");
> + levelf_bld->zero, *lod_fpart_inout,
> "");
>
> lp_build_name(*level0_out, "texture%u_miplevel0", texture_unit);
> lp_build_name(*level1_out, "texture%u_miplevel1", texture_unit);
> @@ -1087,7 +1092,7 @@ lp_build_mipmap_level_sizes(struct
> lp_build_sample_context *bld,
> LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
>
> ileveli = lp_build_extract_broadcast(bld->gallivm,
> - bld->perquadi_bld.type,
> + bld->leveli_bld.type,
> bld4.type,
> ilevel,
> indexi);
> @@ -1131,10 +1136,9 @@ lp_build_mipmap_level_sizes(struct
> lp_build_sample_context *bld,
> tmp[i] = bld->int_size;
> tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i],
> ilevel1);
> }
> - int_size_vec = lp_build_concat(bld->gallivm,
> - tmp,
> - bld->int_size_in_bld.type,
> - bld->num_lods);
> + *out_size = lp_build_concat(bld->gallivm, tmp,
> + bld->int_size_in_bld.type,
> + bld->num_lods);
> }
> }
> }
> @@ -1218,10 +1222,10 @@ lp_build_extract_image_sizes(struct
> lp_build_sample_context *bld,
> *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
> coord_type, size, 0);
> if (dims >= 2) {
> - *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
> - coord_type, size, 1);
> + *out_height = lp_build_pack_aos_scalars(bld->gallivm, size_type,
> + coord_type, size, 1);
> if (dims == 3) {
> - *out_width = lp_build_pack_aos_scalars(bld->gallivm,
> size_type,
> + *out_depth = lp_build_pack_aos_scalars(bld->gallivm,
> size_type,
> coord_type, size, 2);
> }
> }
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> index cde8ce9..a3ecc05 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> @@ -268,13 +268,13 @@ struct lp_build_sample_context
> struct lp_type texel_type;
> struct lp_build_context texel_bld;
>
> - /** Float per-quad type */
> - struct lp_type perquadf_type;
> - struct lp_build_context perquadf_bld;
> + /** Float level type */
> + struct lp_type levelf_type;
> + struct lp_build_context levelf_bld;
>
> - /** Int per-quad type */
> - struct lp_type perquadi_type;
> - struct lp_build_context perquadi_bld;
> + /** Int level type */
> + struct lp_type leveli_type;
> + struct lp_build_context leveli_bld;
>
> /* Common dynamic state values */
> LLVMValueRef row_stride_array;
> @@ -477,6 +477,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> const struct lp_derivatives *derivs,
> LLVMValueRef lod_bias,
> LLVMValueRef explicit_lod,
> + boolean scalar_lod,
> LLVMValueRef texel_out[4]);
>
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
> index 104c24d..da416aa 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
> @@ -1422,8 +1422,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>
> if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
> LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
> - bld->perquadf_bld.type,
> 256.0);
> - LLVMTypeRef i32vec_type = lp_build_vec_type(bld->gallivm,
> bld->perquadi_bld.type);
> + bld->levelf_bld.type,
> 256.0);
> + LLVMTypeRef i32vec_type = bld->leveli_bld.vec_type;
> struct lp_build_if_state if_ctx;
> LLVMValueRef need_lerp;
> unsigned num_quads = bld->coord_bld.type.length / 4;
> @@ -1433,9 +1433,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
> lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type,
> "lod_fpart.fixed16");
>
> /* need_lerp = lod_fpart > 0 */
> - if (num_quads == 1) {
> + if (bld->num_lods == 1) {
> need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
> - lod_fpart, bld->perquadi_bld.zero,
> + lod_fpart, bld->leveli_bld.zero,
> "need_lerp");
> }
> else {
> @@ -1450,9 +1450,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
> * lod_fpart values have same sign.
> * We can however then skip the greater than comparison.
> */
> - lod_fpart = lp_build_max(&bld->perquadi_bld, lod_fpart,
> - bld->perquadi_bld.zero);
> - need_lerp = lp_build_any_true_range(&bld->perquadi_bld, num_quads,
> lod_fpart);
> + lod_fpart = lp_build_max(&bld->leveli_bld, lod_fpart,
> + bld->leveli_bld.zero);
> + need_lerp = lp_build_any_true_range(&bld->leveli_bld,
> bld->num_lods, lod_fpart);
> }
>
> lp_build_if(&if_ctx, bld->gallivm, need_lerp);
> @@ -1465,9 +1465,6 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
> lp_build_mipmap_level_sizes(bld, ilevel1,
> &size1,
> &row_stride1_vec, &img_stride1_vec);
> - lp_build_mipmap_level_sizes(bld, ilevel1,
> - &size1,
> - &row_stride1_vec, &img_stride1_vec);
> if (bld->num_lods == 1) {
> data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
> }
> @@ -1511,7 +1508,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>
> /* interpolate samples from the two mipmap levels */
>
> - if (num_quads == 1) {
> + if (num_quads == 1 && bld->num_lods == 1) {
> lod_fpart = LLVMBuildTrunc(builder, lod_fpart,
> u8n_bld.elem_type, "");
> lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart);
>
> @@ -1526,17 +1523,16 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
> #endif
> }
> else {
> - const unsigned num_chans_per_quad = 4 * 4;
> - LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type,
> bld->perquadi_bld.type.length);
> + unsigned num_chans_per_lod = 4 * bld->coord_type.length /
> bld->num_lods;
> + LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type,
> bld->leveli_bld.type.length);
> LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
>
> /* Take the LSB of lod_fpart */
> lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type,
> "");
>
> /* Broadcast each lod weight into their respective channels */
> - assert(u8n_bld.type.length == num_quads * num_chans_per_quad);
> for (i = 0; i < u8n_bld.type.length; ++i) {
> - shuffle[i] = lp_build_const_int32(bld->gallivm, i /
> num_chans_per_quad);
> + shuffle[i] = lp_build_const_int32(bld->gallivm, i /
> num_chans_per_lod);
> }
> lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart,
> LLVMGetUndef(tmp_vec_type),
> LLVMConstVector(shuffle,
> u8n_bld.type.length), "");
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> index e0a59d0..07fa47e 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> @@ -979,17 +979,17 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
> if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
> struct lp_build_if_state if_ctx;
> LLVMValueRef need_lerp;
> - unsigned num_quads = bld->coord_bld.type.length / 4;
>
> /* need_lerp = lod_fpart > 0 */
> - if (num_quads == 1) {
> + if (bld->num_lods == 1) {
> need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
> - lod_fpart, bld->perquadf_bld.zero,
> + lod_fpart, bld->levelf_bld.zero,
> "need_lerp");
> }
> else {
> /*
> - * We'll do mip filtering if any of the quads need it.
> + * We'll do mip filtering if any of the quads (or individual
> + * pixel in case of per-pixel lod) need it.
> * It might be better to split the vectors here and only
> fetch/filter
> * quads which need it.
> */
> @@ -998,13 +998,13 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
> * negative values which would screw up filtering if not all
> * lod_fpart values have same sign.
> */
> - lod_fpart = lp_build_max(&bld->perquadf_bld, lod_fpart,
> - bld->perquadf_bld.zero);
> - need_lerp = lp_build_compare(bld->gallivm, bld->perquadf_bld.type,
> + lod_fpart = lp_build_max(&bld->levelf_bld, lod_fpart,
> + bld->levelf_bld.zero);
> + need_lerp = lp_build_compare(bld->gallivm, bld->levelf_bld.type,
> PIPE_FUNC_GREATER,
> - lod_fpart, bld->perquadf_bld.zero);
> - need_lerp = lp_build_any_true_range(&bld->perquadi_bld, num_quads,
> need_lerp);
> - }
> + lod_fpart, bld->levelf_bld.zero);
> + need_lerp = lp_build_any_true_range(&bld->leveli_bld,
> bld->num_lods, need_lerp);
> + }
>
> lp_build_if(&if_ctx, bld->gallivm, need_lerp);
> {
> @@ -1036,10 +1036,11 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>
> /* interpolate samples from the two mipmap levels */
>
> - lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
> -
> bld->perquadf_bld.type,
> -
> bld->texel_bld.type,
> - lod_fpart);
> + if (bld->num_lods != bld->coord_type.length)
> + lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
> +
> bld->levelf_bld.type,
> +
> bld->texel_bld.type,
> + lod_fpart);
>
> for (chan = 0; chan < 4; chan++) {
> colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
> @@ -1143,7 +1144,7 @@ lp_build_sample_common(struct lp_build_sample_context
> *bld,
> mip_filter,
> lod_ipart, lod_fpart);
> } else {
> - *lod_ipart = bld->perquadi_bld.zero;
> + *lod_ipart = bld->leveli_bld.zero;
> }
>
> /*
> @@ -1166,7 +1167,7 @@ lp_build_sample_common(struct lp_build_sample_context
> *bld,
> else {
> first_level = bld->dynamic_state->first_level(bld->dynamic_state,
> bld->gallivm,
> texture_index);
> - first_level = lp_build_broadcast_scalar(&bld->perquadi_bld,
> first_level);
> + first_level = lp_build_broadcast_scalar(&bld->leveli_bld,
> first_level);
> *ilevel0 = first_level;
> }
> break;
> @@ -1295,7 +1296,7 @@ lp_build_fetch_texel(struct lp_build_sample_context
> *bld,
> const LLVMValueRef *offsets,
> LLVMValueRef *colors_out)
> {
> - struct lp_build_context *perquadi_bld = &bld->perquadi_bld;
> + struct lp_build_context *perquadi_bld = &bld->leveli_bld;
> struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
> unsigned dims = bld->dims, chan;
> unsigned target = bld->static_texture_state->target;
> @@ -1307,8 +1308,13 @@ lp_build_fetch_texel(struct lp_build_sample_context
> *bld,
>
> /* XXX just like ordinary sampling, we don't handle per-pixel lod (yet).
> */
> if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
> - ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
> - perquadi_bld->type, explicit_lod,
> 0);
> + if (bld->num_lods != int_coord_bld->type.length) {
> + ilevel = lp_build_pack_aos_scalars(bld->gallivm,
> int_coord_bld->type,
> + perquadi_bld->type,
> explicit_lod, 0);
> + }
> + else {
> + ilevel = explicit_lod;
> + }
> lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel);
> }
> else {
> @@ -1489,6 +1495,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> const struct lp_derivatives *derivs, /* optional */
> LLVMValueRef lod_bias, /* optional */
> LLVMValueRef explicit_lod, /* optional */
> + boolean scalar_lod,
> LLVMValueRef texel_out[4])
> {
> unsigned dims = texture_dims(static_texture_state->target);
> @@ -1529,10 +1536,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> bld.float_size_in_type.length = dims > 1 ? 4 : 1;
> bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
> bld.texel_type = type;
> - bld.perquadf_type = type;
> - /* we want native vector size to be able to use our intrinsics */
> - bld.perquadf_type.length = type.length > 4 ? ((type.length + 15) / 16) *
> 4 : 1;
> - bld.perquadi_type = lp_int_type(bld.perquadf_type);
>
> /* always using the first channel hopefully should be safe,
> * if not things WILL break in other places anyway.
> @@ -1567,17 +1570,31 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> * There are other situations where at least the multiple int lods could
> be
> * avoided like min and max lod being equal.
> */
> - if ((is_fetch && explicit_lod && bld.static_texture_state->target !=
> PIPE_BUFFER) ||
> - (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
> + if (explicit_lod && !scalar_lod &&
> + ((is_fetch && bld.static_texture_state->target != PIPE_BUFFER) ||
> + (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
> + bld.num_lods = type.length;
> + /* TODO: for true scalar_lod should only use 1 lod value */
> + else if (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE) {
> bld.num_lods = num_quads;
> }
> else {
> bld.num_lods = 1;
> }
>
> + bld.levelf_type = type;
> + /* we want native vector size to be able to use our intrinsics */
> + if (bld.num_lods != type.length) {
> + bld.levelf_type.length = type.length > 4 ? ((type.length + 15) / 16) *
> 4 : 1;
> + }
> + bld.leveli_type = lp_int_type(bld.levelf_type);
> bld.float_size_type = bld.float_size_in_type;
> - bld.float_size_type.length = bld.num_lods > 1 ? type.length :
> - bld.float_size_in_type.length;
> + /* the vectors here are GIGANTIC (up to 32xf32), rely on llvm there */
> + if (bld.num_lods > 1) {
> + bld.float_size_type.length = bld.num_lods == type.length ?
> + bld.num_lods *
> bld.float_size_in_type.length :
> + type.length;
> + }
> bld.int_size_type = lp_int_type(bld.float_size_type);
>
> lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
> @@ -1590,8 +1607,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
> lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
> lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
> - lp_build_context_init(&bld.perquadf_bld, gallivm, bld.perquadf_type);
> - lp_build_context_init(&bld.perquadi_bld, gallivm, bld.perquadi_type);
> + lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
> + lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
>
> /* Get the dynamic state */
> tex_width = dynamic_state->width(dynamic_state, gallivm, texture_index);
> @@ -1735,14 +1752,32 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
> bld4.texel_type = bld.texel_type;
> bld4.texel_type.length = 4;
> - bld4.perquadf_type = type4;
> + bld4.levelf_type = type4;
> /* we want native vector size to be able to use our intrinsics */
> - bld4.perquadf_type.length = 1;
> - bld4.perquadi_type = lp_int_type(bld4.perquadf_type);
> + bld4.levelf_type.length = 1;
> + bld4.leveli_type = lp_int_type(bld4.levelf_type);
> +
> + if (explicit_lod && !scalar_lod &&
> + ((is_fetch && bld.static_texture_state->target != PIPE_BUFFER)
> ||
> + (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
> + bld4.num_lods = type4.length;
> + else
> + bld4.num_lods = 1;
>
> - bld4.num_lods = 1;
> - bld4.int_size_type = bld4.int_size_in_type;
> + bld4.levelf_type = type4;
> + /* we want native vector size to be able to use our intrinsics */
> + if (bld4.num_lods != type4.length) {
> + bld4.levelf_type.length = 1;
> + }
> + bld4.leveli_type = lp_int_type(bld4.levelf_type);
> bld4.float_size_type = bld4.float_size_in_type;
> + /* the vectors here are GIGANTIC (up to 16xf32) */
> + if (bld4.num_lods > 1) {
> + bld4.float_size_type.length = bld4.num_lods == type4.length ?
> + bld4.num_lods *
> bld4.float_size_in_type.length :
> + type4.length;
> + }
> + bld4.int_size_type = lp_int_type(bld4.float_size_type);
>
> lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
> lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
> @@ -1754,15 +1789,15 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> lp_build_context_init(&bld4.int_size_bld, gallivm,
> bld4.int_size_type);
> lp_build_context_init(&bld4.float_size_bld, gallivm,
> bld4.float_size_type);
> lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
> - lp_build_context_init(&bld4.perquadf_bld, gallivm,
> bld4.perquadf_type);
> - lp_build_context_init(&bld4.perquadi_bld, gallivm,
> bld4.perquadi_type);
> + lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
> + lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
>
> for (i = 0; i < num_quads; i++) {
> LLVMValueRef s4, t4, r4;
> - LLVMValueRef lod_iparts, lod_fparts = NULL;
> - LLVMValueRef ilevel0s, ilevel1s = NULL;
> - LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
> + LLVMValueRef lod_ipart4, lod_fpart4 = NULL;
> + LLVMValueRef ilevel04, ilevel14 = NULL;
> LLVMValueRef offsets4[4] = { NULL };
> + unsigned num_lods = bld4.num_lods;
>
> s4 = lp_build_extract_range(gallivm, s, 4*i, 4);
> t4 = lp_build_extract_range(gallivm, t, 4*i, 4);
> @@ -1777,27 +1812,27 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> }
> }
> }
> - lod_iparts = LLVMBuildExtractElement(builder, lod_ipart, indexi,
> "");
> - ilevel0s = LLVMBuildExtractElement(builder, ilevel0, indexi,
> "");
> + lod_ipart4 = lp_build_extract_range(gallivm, lod_ipart, num_lods
> * i, num_lods);
> + ilevel04 = lp_build_extract_range(gallivm, ilevel0, num_lods *
> i, num_lods);
> if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
> - ilevel1s = LLVMBuildExtractElement(builder, ilevel1, indexi,
> "");
> - lod_fparts = LLVMBuildExtractElement(builder, lod_fpart,
> indexi, "");
> + ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods
> * i, num_lods);
> + lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart,
> num_lods * i, num_lods);
> }
>
> if (use_aos) {
> /* do sampling/filtering with fixed pt arithmetic */
> lp_build_sample_aos(&bld4, sampler_index,
> s4, t4, r4, offsets4,
> - lod_iparts, lod_fparts,
> - ilevel0s, ilevel1s,
> + lod_ipart4, lod_fpart4,
> + ilevel04, ilevel14,
> texelout4);
> }
>
> else {
> lp_build_sample_general(&bld4, sampler_index,
> s4, t4, r4, offsets4,
> - lod_iparts, lod_fparts,
> - ilevel0s, ilevel1s,
> + lod_ipart4, lod_fpart4,
> + ilevel04, ilevel14,
> texelout4);
> }
> for (j = 0; j < 4; j++) {
> @@ -1864,6 +1899,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
> lp_build_context_init(&bld_int_vec, gallivm, lp_type_int_vec(32, 128));
>
> if (explicit_lod) {
> + /* FIXME: this needs to honor per-element lod */
> lod = LLVMBuildExtractElement(gallivm->builder, explicit_lod,
> lp_build_const_int32(gallivm, 0), "");
> first_level = dynamic_state->first_level(dynamic_state, gallivm,
> texture_unit);
> lod = lp_build_broadcast_scalar(&bld_int_vec,
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> index fd566b1..0b48450 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> @@ -184,6 +184,7 @@ struct lp_build_sampler_soa
> const struct lp_derivatives *derivs,
> LLVMValueRef lod_bias, /* optional */
> LLVMValueRef explicit_lod, /* optional */
> + boolean scalar_lod,
> LLVMValueRef *texel);
>
> void
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index 0bbc408..862be0a 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -1576,6 +1576,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
> LLVMValueRef offsets[3] = { NULL };
> struct lp_derivatives derivs;
> struct lp_derivatives *deriv_ptr = NULL;
> + boolean scalar_lod;
> unsigned num_coords, num_derivs, num_offsets;
> unsigned i;
>
> @@ -1693,6 +1694,9 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
> }
> }
>
> + /* TODO: use scalar lod if explicit_lod, lod_bias or derivs are
> broadcasted scalars */
> + scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
> +
> bld->sampler->emit_fetch_texel(bld->sampler,
> bld->bld_base.base.gallivm,
> bld->bld_base.base.type,
> @@ -1701,7 +1705,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
> coords,
> offsets,
> deriv_ptr,
> - lod_bias, explicit_lod,
> + lod_bias, explicit_lod, scalar_lod,
> texel);
> }
>
> @@ -1719,6 +1723,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
> LLVMValueRef offsets[3] = { NULL };
> struct lp_derivatives derivs;
> struct lp_derivatives *deriv_ptr = NULL;
> + boolean scalar_lod;
> unsigned num_coords, num_offsets, num_derivs;
> unsigned i;
>
> @@ -1836,6 +1841,9 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
> }
> }
>
> + /* TODO: use scalar lod if explicit_lod, lod_bias or derivs are
> broadcasted scalars */
> + scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
> +
> bld->sampler->emit_fetch_texel(bld->sampler,
> bld->bld_base.base.gallivm,
> bld->bld_base.base.type,
> @@ -1844,7 +1852,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
> coords,
> offsets,
> deriv_ptr,
> - lod_bias, explicit_lod,
> + lod_bias, explicit_lod, scalar_lod,
> texel);
> }
>
> @@ -1859,6 +1867,7 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context
> *bld,
> LLVMValueRef explicit_lod = NULL;
> LLVMValueRef coords[3];
> LLVMValueRef offsets[3] = { NULL };
> + boolean scalar_lod;
> unsigned num_coords;
> unsigned dims;
> unsigned i;
> @@ -1927,6 +1936,9 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context
> *bld,
> }
> }
>
> + /* TODO: use scalar lod if explicit_lod is broadcasted scalar */
> + scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
> +
> bld->sampler->emit_fetch_texel(bld->sampler,
> bld->bld_base.base.gallivm,
> bld->bld_base.base.type,
> @@ -1935,7 +1947,7 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context
> *bld,
> coords,
> offsets,
> NULL,
> - NULL, explicit_lod,
> + NULL, explicit_lod, scalar_lod,
> texel);
> }
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c
> b/src/gallium/auxiliary/tgsi/tgsi_scan.c
> index 2a37b38..a473782 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
> @@ -72,6 +72,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
> procType == TGSI_PROCESSOR_VERTEX ||
> procType == TGSI_PROCESSOR_GEOMETRY ||
> procType == TGSI_PROCESSOR_COMPUTE);
> + info->processor = procType;
>
>
> /**
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h
> b/src/gallium/auxiliary/tgsi/tgsi_scan.h
> index 91eef67..b62c462 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
> @@ -54,6 +54,8 @@ struct tgsi_shader_info
> ubyte num_system_values;
> ubyte system_value_semantic_name[PIPE_MAX_SHADER_INPUTS];
>
> + ubyte processor;
> +
> uint file_mask[TGSI_FILE_COUNT]; /**< bitmask of declared registers */
> uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */
> int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers
> */
> diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c
> b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
> index df2a610..2fb6f5b 100644
> --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c
> +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
> @@ -244,6 +244,7 @@ lp_llvm_sampler_soa_emit_fetch_texel(const struct
> lp_build_sampler_soa *base,
> const struct lp_derivatives *derivs,
> LLVMValueRef lod_bias, /* optional */
> LLVMValueRef explicit_lod, /* optional
> */
> + boolean scalar_lod,
> LLVMValueRef *texel)
> {
> struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base;
> @@ -267,7 +268,7 @@ lp_llvm_sampler_soa_emit_fetch_texel(const struct
> lp_build_sampler_soa *base,
> coords,
> offsets,
> derivs,
> - lod_bias, explicit_lod,
> + lod_bias, explicit_lod, scalar_lod,
> texel);
> }
>
> --
> 1.7.9.5
>
More information about the mesa-dev
mailing list