[Mesa-dev] [PATCH 1/2] gallivm: refactor num_lods handling
Jose Fonseca
jfonseca at vmware.com
Wed Aug 28 13:59:14 PDT 2013
LGTM.
Jose
----- Original Message -----
> From: Roland Scheidegger <sroland at vmware.com>
>
> This is just preparation for per-pixel (or per-quad in case of multiple
> quads)
> min/mag filter since some assumptions about number of miplevels being equal
> to number of lods no longer holds true.
> This change does not change behavior yet (though theoretically when forcing
> per-element path it might be slower with different min/mag filter since the
> code will respect this setting even when there's no mip maps now in this
> case,
> so some lod calcs will be done per-element just ultimately still the same
> filter used for all pixels).
> ---
> src/gallium/auxiliary/gallivm/lp_bld_sample.c | 126 +++++++++---------
> src/gallium/auxiliary/gallivm/lp_bld_sample.h | 13 +-
> src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | 20 +--
> src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 141
> ++++++++++++---------
> 4 files changed, 169 insertions(+), 131 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> index 89d7249..e1cfd78 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> @@ -217,7 +217,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
> struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
> struct lp_build_context *float_bld = &bld->float_bld;
> struct lp_build_context *coord_bld = &bld->coord_bld;
> - struct lp_build_context *levelf_bld = &bld->levelf_bld;
> + struct lp_build_context *rho_bld = &bld->lodf_bld;
> const unsigned dims = bld->dims;
> LLVMValueRef ddx_ddy[2];
> LLVMBuilderRef builder = bld->gallivm->builder;
> @@ -231,7 +231,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
> LLVMValueRef first_level, first_level_vec;
> unsigned length = coord_bld->type.length;
> unsigned num_quads = length / 4;
> - boolean rho_per_quad = levelf_bld->type.length != length;
> + boolean rho_per_quad = rho_bld->type.length != length;
> unsigned i;
> LLVMValueRef i32undef =
> LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
> LLVMValueRef rho_xvec, rho_yvec;
> @@ -259,18 +259,18 @@ lp_build_rho(struct lp_build_sample_context *bld,
> */
> if (rho_per_quad) {
> rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> - levelf_bld->type, cube_rho, 0);
> + rho_bld->type, cube_rho, 0);
> }
> else {
> rho = lp_build_swizzle_scalar_aos(coord_bld, cube_rho, 0, 4);
> }
> if (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) {
> - rho = lp_build_sqrt(levelf_bld, rho);
> + rho = lp_build_sqrt(rho_bld, rho);
> }
> /* Could optimize this for single quad just skip the broadcast */
> cubesize = lp_build_extract_broadcast(gallivm,
> bld->float_size_in_type,
> - levelf_bld->type, float_size,
> index0);
> - rho = lp_build_mul(levelf_bld, cubesize, rho);
> + rho_bld->type, float_size,
> index0);
> + rho = lp_build_mul(rho_bld, cubesize, rho);
> }
> else if (derivs && !(bld->static_texture_state->target ==
> PIPE_TEXTURE_CUBE)) {
> LLVMValueRef ddmax[3], ddx[3], ddy[3];
> @@ -311,9 +311,9 @@ lp_build_rho(struct lp_build_sample_context *bld,
> * otherwise would also need different code to per-pixel lod
> case.
> */
> rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> - levelf_bld->type, rho, 0);
> + rho_bld->type, rho, 0);
> }
> - rho = lp_build_sqrt(levelf_bld, rho);
> + rho = lp_build_sqrt(rho_bld, rho);
>
> }
> else {
> @@ -329,7 +329,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
> * rho_vec contains per-pixel rho, convert to scalar per quad.
> */
> rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> - levelf_bld->type, rho, 0);
> + rho_bld->type, rho, 0);
> }
> }
> }
> @@ -404,7 +404,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
>
> if (rho_per_quad) {
> rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> - levelf_bld->type, rho, 0);
> + rho_bld->type, rho, 0);
> }
> else {
> /*
> @@ -416,7 +416,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
> */
> rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
> }
> - rho = lp_build_sqrt(levelf_bld, rho);
> + rho = lp_build_sqrt(rho_bld, rho);
> }
> else {
> ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
> @@ -497,7 +497,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
> }
> if (rho_per_quad) {
> rho = lp_build_pack_aos_scalars(bld->gallivm,
> coord_bld->type,
> - levelf_bld->type, rho, 0);
> + rho_bld->type, rho, 0);
> }
> else {
> rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
> @@ -528,7 +528,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
> }
> }
> if (!rho_per_quad) {
> - rho = lp_build_broadcast_scalar(levelf_bld, rho);
> + rho = lp_build_broadcast_scalar(rho_bld, rho);
> }
> }
> }
> @@ -675,8 +675,7 @@ lp_build_brilinear_rho(struct lp_build_context *bld,
> * \param out_lod_fpart float part of lod (never larger than 1 but may be
> negative)
> * \param out_lod_positive (mask) if lod is positive (i.e. texture is
> minified)
> *
> - * The resulting lod is scalar per quad, so only the first value per quad
> - * passed in from lod_bias, explicit_lod is used.
> + * The resulting lod can be scalar per quad or be per element.
> */
> void
> lp_build_lod_selector(struct lp_build_sample_context *bld,
> @@ -696,12 +695,12 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
>
> {
> LLVMBuilderRef builder = bld->gallivm->builder;
> - struct lp_build_context *levelf_bld = &bld->levelf_bld;
> + struct lp_build_context *lodf_bld = &bld->lodf_bld;
> LLVMValueRef lod;
>
> - *out_lod_ipart = bld->leveli_bld.zero;
> - *out_lod_positive = bld->leveli_bld.zero;
> - *out_lod_fpart = levelf_bld->zero;
> + *out_lod_ipart = bld->lodi_bld.zero;
> + *out_lod_positive = bld->lodi_bld.zero;
> + *out_lod_fpart = lodf_bld->zero;
>
> /*
> * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture
> Magnification:
> @@ -729,13 +728,13 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
> bld->dynamic_state->min_lod(bld->dynamic_state,
> bld->gallivm, sampler_unit);
>
> - lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
> + lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
> }
> else {
> if (explicit_lod) {
> if (bld->num_lods != bld->coord_type.length)
> lod = lp_build_pack_aos_scalars(bld->gallivm,
> bld->coord_bld.type,
> - levelf_bld->type, explicit_lod,
> 0);
> + lodf_bld->type, explicit_lod,
> 0);
> else
> lod = explicit_lod;
> }
> @@ -764,33 +763,33 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
> * Don't actually need both all the time, ipart is needed
> * for nearest mipfilter, pos_or_zero if min != mag.
> */
> - *out_lod_ipart = lp_build_ilog2(levelf_bld, rho);
> - *out_lod_positive = lp_build_cmp(levelf_bld,
> PIPE_FUNC_GREATER,
> - rho, levelf_bld->one);
> + *out_lod_ipart = lp_build_ilog2(lodf_bld, rho);
> + *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
> + rho, lodf_bld->one);
> return;
> }
> if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
> !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
> - lp_build_brilinear_rho(levelf_bld, rho, BRILINEAR_FACTOR,
> + lp_build_brilinear_rho(lodf_bld, rho, BRILINEAR_FACTOR,
> out_lod_ipart, out_lod_fpart);
> - *out_lod_positive = lp_build_cmp(levelf_bld,
> PIPE_FUNC_GREATER,
> - rho, levelf_bld->one);
> + *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
> + rho, lodf_bld->one);
> return;
> }
> }
>
> if (0) {
> - lod = lp_build_log2(levelf_bld, rho);
> + lod = lp_build_log2(lodf_bld, rho);
> }
> else {
> - lod = lp_build_fast_log2(levelf_bld, rho);
> + lod = lp_build_fast_log2(lodf_bld, rho);
> }
>
> /* add shader lod bias */
> if (lod_bias) {
> if (bld->num_lods != bld->coord_type.length)
> lod_bias = lp_build_pack_aos_scalars(bld->gallivm,
> bld->coord_bld.type,
> - levelf_bld->type,
> lod_bias, 0);
> + lodf_bld->type,
> lod_bias, 0);
> lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
> }
> }
> @@ -800,7 +799,7 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
> LLVMValueRef sampler_lod_bias =
> bld->dynamic_state->lod_bias(bld->dynamic_state,
> bld->gallivm, sampler_unit);
> - sampler_lod_bias = lp_build_broadcast_scalar(levelf_bld,
> + sampler_lod_bias = lp_build_broadcast_scalar(lodf_bld,
> sampler_lod_bias);
> lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias,
> "sampler_lod_bias");
> }
> @@ -810,36 +809,36 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
> LLVMValueRef max_lod =
> bld->dynamic_state->max_lod(bld->dynamic_state,
> bld->gallivm, sampler_unit);
> - max_lod = lp_build_broadcast_scalar(levelf_bld, max_lod);
> + max_lod = lp_build_broadcast_scalar(lodf_bld, max_lod);
>
> - lod = lp_build_min(levelf_bld, lod, max_lod);
> + lod = lp_build_min(lodf_bld, lod, max_lod);
> }
> if (bld->static_sampler_state->apply_min_lod) {
> LLVMValueRef min_lod =
> bld->dynamic_state->min_lod(bld->dynamic_state,
> bld->gallivm, sampler_unit);
> - min_lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
> + min_lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
>
> - lod = lp_build_max(levelf_bld, lod, min_lod);
> + lod = lp_build_max(lodf_bld, lod, min_lod);
> }
> }
>
> - *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
> - lod, levelf_bld->zero);
> + *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
> + lod, lodf_bld->zero);
>
> if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
> if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
> - lp_build_brilinear_lod(levelf_bld, lod, BRILINEAR_FACTOR,
> + lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR,
> out_lod_ipart, out_lod_fpart);
> }
> else {
> - lp_build_ifloor_fract(levelf_bld, lod, out_lod_ipart,
> out_lod_fpart);
> + lp_build_ifloor_fract(lodf_bld, lod, out_lod_ipart, out_lod_fpart);
> }
>
> lp_build_name(*out_lod_fpart, "lod_fpart");
> }
> else {
> - *out_lod_ipart = lp_build_iround(levelf_bld, lod);
> + *out_lod_ipart = lp_build_iround(lodf_bld, lod);
> }
>
> lp_build_name(*out_lod_ipart, "lod_ipart");
> @@ -880,14 +879,14 @@ lp_build_nearest_mip_level(struct
> lp_build_sample_context *bld,
> out = lp_build_cmp(leveli_bld, PIPE_FUNC_LESS, level, first_level);
> out1 = lp_build_cmp(leveli_bld, PIPE_FUNC_GREATER, level, last_level);
> out = lp_build_or(leveli_bld, out, out1);
> - if (bld->num_lods == bld->coord_bld.type.length) {
> + if (bld->num_mips == bld->coord_bld.type.length) {
> *out_of_bounds = out;
> }
> - else if (bld->num_lods == 1) {
> + else if (bld->num_mips == 1) {
> *out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld,
> out);
> }
> else {
> - assert(bld->num_lods == bld->coord_bld.type.length / 4);
> + assert(bld->num_mips == bld->coord_bld.type.length / 4);
> *out_of_bounds =
> lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
> leveli_bld->type,
> bld->int_coord_bld.type,
> @@ -904,8 +903,9 @@ lp_build_nearest_mip_level(struct lp_build_sample_context
> *bld,
>
>
> /**
> - * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad int LOD(s) to two
> (per-quad)
> - * (adjacent) mipmap level indexes, and fix up float lod part accordingly.
> + * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad (or per element) int
> LOD(s)
> + * to two (per-quad) (adjacent) mipmap level indexes, and fix up float lod
> + * part accordingly.
> * Later, we'll sample from those two mipmap levels and interpolate between
> them.
> */
> void
> @@ -923,6 +923,8 @@ lp_build_linear_mip_levels(struct lp_build_sample_context
> *bld,
> LLVMValueRef clamp_min;
> LLVMValueRef clamp_max;
>
> + assert(bld->num_lods == bld->num_mips);
> +
> first_level = bld->dynamic_state->first_level(bld->dynamic_state,
> bld->gallivm,
> texture_unit);
> last_level = bld->dynamic_state->last_level(bld->dynamic_state,
> @@ -1013,17 +1015,17 @@ lp_build_get_mip_offsets(struct
> lp_build_sample_context *bld,
> LLVMValueRef indexes[2], offsets, offset1;
>
> indexes[0] = lp_build_const_int32(bld->gallivm, 0);
> - if (bld->num_lods == 1) {
> + if (bld->num_mips == 1) {
> indexes[1] = level;
> offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
> offset1 = LLVMBuildLoad(builder, offset1, "");
> offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
> }
> - else if (bld->num_lods == bld->coord_bld.type.length / 4) {
> + else if (bld->num_mips == bld->coord_bld.type.length / 4) {
> unsigned i;
>
> offsets = bld->int_coord_bld.undef;
> - for (i = 0; i < bld->num_lods; i++) {
> + for (i = 0; i < bld->num_mips; i++) {
> LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
> LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
> indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
> @@ -1036,10 +1038,10 @@ lp_build_get_mip_offsets(struct
> lp_build_sample_context *bld,
> else {
> unsigned i;
>
> - assert (bld->num_lods == bld->coord_bld.type.length);
> + assert (bld->num_mips == bld->coord_bld.type.length);
>
> offsets = bld->int_coord_bld.undef;
> - for (i = 0; i < bld->num_lods; i++) {
> + for (i = 0; i < bld->num_mips; i++) {
> LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
> indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
> offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
> @@ -1089,18 +1091,18 @@ lp_build_get_level_stride_vec(struct
> lp_build_sample_context *bld,
> LLVMBuilderRef builder = bld->gallivm->builder;
> LLVMValueRef indexes[2], stride, stride1;
> indexes[0] = lp_build_const_int32(bld->gallivm, 0);
> - if (bld->num_lods == 1) {
> + if (bld->num_mips == 1) {
> indexes[1] = level;
> stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
> stride1 = LLVMBuildLoad(builder, stride1, "");
> stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
> }
> - else if (bld->num_lods == bld->coord_bld.type.length / 4) {
> + else if (bld->num_mips == bld->coord_bld.type.length / 4) {
> LLVMValueRef stride1;
> unsigned i;
>
> stride = bld->int_coord_bld.undef;
> - for (i = 0; i < bld->num_lods; i++) {
> + for (i = 0; i < bld->num_mips; i++) {
> LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
> LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
> indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
> @@ -1114,7 +1116,7 @@ lp_build_get_level_stride_vec(struct
> lp_build_sample_context *bld,
> LLVMValueRef stride1;
> unsigned i;
>
> - assert (bld->num_lods == bld->coord_bld.type.length);
> + assert (bld->num_mips == bld->coord_bld.type.length);
>
> stride = bld->int_coord_bld.undef;
> for (i = 0; i < bld->coord_bld.type.length; i++) {
> @@ -1147,7 +1149,7 @@ lp_build_mipmap_level_sizes(struct
> lp_build_sample_context *bld,
> /*
> * Compute width, height, depth at mipmap level 'ilevel'
> */
> - if (bld->num_lods == 1) {
> + if (bld->num_mips == 1) {
> ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
> *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size,
> ilevel_vec);
> }
> @@ -1157,7 +1159,7 @@ lp_build_mipmap_level_sizes(struct
> lp_build_sample_context *bld,
> unsigned num_quads = bld->coord_bld.type.length / 4;
> unsigned i;
>
> - if (bld->num_lods == num_quads) {
> + if (bld->num_mips == num_quads) {
> /*
> * XXX: this should be #ifndef SANE_INSTRUCTION_SET.
> * intel "forgot" the variable shift count instruction until avx2.
> @@ -1216,7 +1218,7 @@ lp_build_mipmap_level_sizes(struct
> lp_build_sample_context *bld,
> * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
> * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...]
> vector.
> */
> - assert(bld->num_lods == bld->coord_bld.type.length);
> + assert(bld->num_mips == bld->coord_bld.type.length);
> if (bld->dims == 1) {
> assert(bld->int_size_in_bld.type.length == 1);
> int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
> @@ -1226,7 +1228,7 @@ lp_build_mipmap_level_sizes(struct
> lp_build_sample_context *bld,
> }
> else {
> LLVMValueRef ilevel1;
> - for (i = 0; i < bld->num_lods; i++) {
> + for (i = 0; i < bld->num_mips; i++) {
> LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
> ilevel1 = lp_build_extract_broadcast(bld->gallivm,
> bld->int_coord_type,
> bld->int_size_in_bld.type,
> ilevel, indexi);
> @@ -1235,7 +1237,7 @@ lp_build_mipmap_level_sizes(struct
> lp_build_sample_context *bld,
> }
> *out_size = lp_build_concat(bld->gallivm, tmp,
> bld->int_size_in_bld.type,
> - bld->num_lods);
> + bld->num_mips);
> }
> }
> }
> @@ -1278,7 +1280,7 @@ lp_build_extract_image_sizes(struct
> lp_build_sample_context *bld,
> LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
> struct lp_type size_type = size_bld->type;
>
> - if (bld->num_lods == 1) {
> + if (bld->num_mips == 1) {
> *out_width = lp_build_extract_broadcast(bld->gallivm,
> size_type,
> coord_type,
> @@ -1305,7 +1307,7 @@ lp_build_extract_image_sizes(struct
> lp_build_sample_context *bld,
> if (dims == 1) {
> *out_width = size;
> }
> - else if (bld->num_lods == num_quads) {
> + else if (bld->num_mips == num_quads) {
> *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4);
> if (dims >= 2) {
> *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4);
> @@ -1315,7 +1317,7 @@ lp_build_extract_image_sizes(struct
> lp_build_sample_context *bld,
> }
> }
> else {
> - assert(bld->num_lods == bld->coord_type.length);
> + assert(bld->num_mips == bld->coord_type.length);
> *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
> coord_type, size, 0);
> if (dims >= 2) {
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> index a7ebe7e..e6b9f30 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> @@ -233,7 +233,10 @@ struct lp_build_sample_context
> /** SIMD vector width */
> unsigned vector_width;
>
> - /** number of lod values (valid are 1, length/4, length) */
> + /** number of mipmaps (valid are 1, length/4, length) */
> + unsigned num_mips;
> +
> + /** number of lod values (valid are 1, length/4, length) */
> unsigned num_lods;
>
> /** regular scalar float type */
> @@ -283,6 +286,14 @@ struct lp_build_sample_context
> struct lp_type leveli_type;
> struct lp_build_context leveli_bld;
>
> + /** Float lod type */
> + struct lp_type lodf_type;
> + struct lp_build_context lodf_bld;
> +
> + /** Int lod type */
> + struct lp_type lodi_type;
> + struct lp_build_context lodi_bld;
> +
> /* Common dynamic state values */
> LLVMValueRef row_stride_array;
> LLVMValueRef img_stride_array;
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
> index 7431388..c35b628 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
> @@ -1373,7 +1373,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
> lp_build_mipmap_level_sizes(bld, ilevel0,
> &size0,
> &row_stride0_vec, &img_stride0_vec);
> - if (bld->num_lods == 1) {
> + if (bld->num_mips == 1) {
> data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
> }
> else {
> @@ -1422,8 +1422,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>
> if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
> LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
> - bld->levelf_bld.type,
> 256.0);
> - LLVMTypeRef i32vec_type = bld->leveli_bld.vec_type;
> + bld->lodf_bld.type,
> 256.0);
> + LLVMTypeRef i32vec_type = bld->lodi_bld.vec_type;
> struct lp_build_if_state if_ctx;
> LLVMValueRef need_lerp;
> unsigned num_quads = bld->coord_bld.type.length / 4;
> @@ -1435,7 +1435,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
> /* need_lerp = lod_fpart > 0 */
> if (bld->num_lods == 1) {
> need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
> - lod_fpart, bld->leveli_bld.zero,
> + lod_fpart, bld->lodi_bld.zero,
> "need_lerp");
> }
> else {
> @@ -1450,9 +1450,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
> * lod_fpart values have same sign.
> * We can however then skip the greater than comparison.
> */
> - lod_fpart = lp_build_max(&bld->leveli_bld, lod_fpart,
> - bld->leveli_bld.zero);
> - need_lerp = lp_build_any_true_range(&bld->leveli_bld,
> bld->num_lods, lod_fpart);
> + lod_fpart = lp_build_max(&bld->lodi_bld, lod_fpart,
> + bld->lodi_bld.zero);
> + need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
> lod_fpart);
> }
>
> lp_build_if(&if_ctx, bld->gallivm, need_lerp);
> @@ -1465,7 +1465,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
> lp_build_mipmap_level_sizes(bld, ilevel1,
> &size1,
> &row_stride1_vec, &img_stride1_vec);
> - if (bld->num_lods == 1) {
> + if (bld->num_mips == 1) {
> data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
> }
> else {
> @@ -1524,7 +1524,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
> }
> else {
> unsigned num_chans_per_lod = 4 * bld->coord_type.length /
> bld->num_lods;
> - LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type,
> bld->leveli_bld.type.length);
> + LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type,
> bld->lodi_bld.type.length);
> LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
>
> /* Take the LSB of lod_fpart */
> @@ -1613,7 +1613,7 @@ lp_build_sample_aos(struct lp_build_sample_context
> *bld,
> * some max probably could hack up the weights in the linear
> * path with selects to work for nearest.
> */
> - if (bld->leveli_bld.type.length > 1)
> + if (bld->num_lods > 1)
> lod_positive = LLVMBuildExtractElement(builder, lod_positive,
> lp_build_const_int32(bld->gallivm,
> 0), "");
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> index 8ad3b9f..c686d82 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> @@ -1087,7 +1087,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
> lp_build_mipmap_level_sizes(bld, ilevel0,
> &size0,
> &row_stride0_vec, &img_stride0_vec);
> - if (bld->num_lods == 1) {
> + if (bld->num_mips == 1) {
> data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
> }
> else {
> @@ -1123,7 +1123,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
> /* need_lerp = lod_fpart > 0 */
> if (bld->num_lods == 1) {
> need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
> - lod_fpart, bld->levelf_bld.zero,
> + lod_fpart, bld->lodf_bld.zero,
> "need_lerp");
> }
> else {
> @@ -1138,12 +1138,12 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
> * negative values which would screw up filtering if not all
> * lod_fpart values have same sign.
> */
> - lod_fpart = lp_build_max(&bld->levelf_bld, lod_fpart,
> - bld->levelf_bld.zero);
> - need_lerp = lp_build_compare(bld->gallivm, bld->levelf_bld.type,
> + lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
> + bld->lodf_bld.zero);
> + need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
> PIPE_FUNC_GREATER,
> - lod_fpart, bld->levelf_bld.zero);
> - need_lerp = lp_build_any_true_range(&bld->leveli_bld,
> bld->num_lods, need_lerp);
> + lod_fpart, bld->lodf_bld.zero);
> + need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
> need_lerp);
> }
>
> lp_build_if(&if_ctx, bld->gallivm, need_lerp);
> @@ -1152,7 +1152,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
> lp_build_mipmap_level_sizes(bld, ilevel1,
> &size1,
> &row_stride1_vec, &img_stride1_vec);
> - if (bld->num_lods == 1) {
> + if (bld->num_mips == 1) {
> data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
> }
> else {
> @@ -1178,7 +1178,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context
> *bld,
>
> if (bld->num_lods != bld->coord_type.length)
> lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
> -
> bld->levelf_bld.type,
> +
> bld->lodf_bld.type,
> bld->texel_bld.type,
> lod_fpart);
>
> @@ -1312,8 +1312,14 @@ lp_build_sample_common(struct lp_build_sample_context
> *bld,
> mip_filter,
> &lod_ipart, lod_fpart, lod_pos_or_zero);
> } else {
> - lod_ipart = bld->leveli_bld.zero;
> - *lod_pos_or_zero = bld->leveli_bld.zero;
> + lod_ipart = bld->lodi_bld.zero;
> + *lod_pos_or_zero = bld->lodi_bld.zero;
> + }
> +
> + if (bld->num_lods != bld->num_mips) {
> + /* only makes sense if there's just a single mip level */
> + assert(bld->num_mips == 1);
> + lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
> }
>
> /*
> @@ -1641,7 +1647,7 @@ lp_build_sample_general(struct lp_build_sample_context
> *bld,
> * some max probably could hack up the weights in the linear
> * path with selects to work for nearest.
> */
> - if (bld->leveli_bld.type.length > 1)
> + if (bld->num_lods > 1)
> lod_positive = LLVMBuildExtractElement(builder, lod_positive,
> lp_build_const_int32(bld->gallivm,
> 0), "");
>
> @@ -1692,7 +1698,7 @@ lp_build_fetch_texel(struct lp_build_sample_context
> *bld,
> const LLVMValueRef *offsets,
> LLVMValueRef *colors_out)
> {
> - struct lp_build_context *perquadi_bld = &bld->leveli_bld;
> + struct lp_build_context *perquadi_bld = &bld->lodi_bld;
> struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
> unsigned dims = bld->dims, chan;
> unsigned target = bld->static_texture_state->target;
> @@ -1706,7 +1712,7 @@ lp_build_fetch_texel(struct lp_build_sample_context
> *bld,
> out_of_bounds = int_coord_bld->zero;
>
> if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
> - if (bld->num_lods != int_coord_bld->type.length) {
> + if (bld->num_mips != int_coord_bld->type.length) {
> ilevel = lp_build_pack_aos_scalars(bld->gallivm,
> int_coord_bld->type,
> perquadi_bld->type,
> explicit_lod, 0);
> }
> @@ -1717,7 +1723,7 @@ lp_build_fetch_texel(struct lp_build_sample_context
> *bld,
> out_of_bound_ret_zero ? &out_of_bounds :
> NULL);
> }
> else {
> - assert(bld->num_lods == 1);
> + assert(bld->num_mips == 1);
> if (bld->static_texture_state->target != PIPE_BUFFER) {
> ilevel = bld->dynamic_state->first_level(bld->dynamic_state,
> bld->gallivm,
> texture_unit);
> @@ -1856,7 +1862,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> unsigned target = static_texture_state->target;
> unsigned dims = texture_dims(target);
> unsigned num_quads = type.length / 4;
> - unsigned mip_filter, i;
> + unsigned mip_filter, min_img_filter, mag_img_filter, i;
> struct lp_build_sample_context bld;
> struct lp_static_sampler_state derived_sampler_state =
> *static_sampler_state;
> LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
> @@ -1919,6 +1925,10 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> debug_printf(" .min_mip_filter = %u\n",
> derived_sampler_state.min_mip_filter);
> }
>
> + min_img_filter = static_sampler_state->min_img_filter;
> + mag_img_filter = static_sampler_state->mag_img_filter;
> +
> +
> /*
> * This is all a bit complicated different paths are chosen for
> performance
> * reasons.
> @@ -1936,38 +1946,51 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> /*
> * There are other situations where at least the multiple int lods could
> be
> * avoided like min and max lod being equal.
> - * XXX if num_lods == 1 (for multiple quads) the level bld contexts will
> still
> - * have length 4. Because lod_selector is always using per quad calcs in
> this
> - * case, but minification etc. don't need to bother. This is very brittle
> though
> - * e.g. num_lods might be 1 but still have multiple positive_lod values!
> */
> + bld.num_mips = bld.num_lods = 1;
> if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
> (explicit_lod || lod_bias ||
> - (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE)) &&
> - ((is_fetch && target != PIPE_BUFFER) ||
> - (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
> - bld.num_lods = type.length;
> + (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) {
> + if ((is_fetch && target != PIPE_BUFFER) ||
> + (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
> + bld.num_mips = type.length;
> + bld.num_lods = type.length;
> + }
> + else if (!is_fetch && min_img_filter != mag_img_filter) {
> + bld.num_mips = 1;
> + bld.num_lods = type.length;
> + }
> + }
> /* TODO: for true scalar_lod should only use 1 lod value */
> - else if ((is_fetch && explicit_lod && target != PIPE_BUFFER ) ||
> + else if ((is_fetch && explicit_lod && target != PIPE_BUFFER) ||
> (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
> + bld.num_mips = num_quads;
> bld.num_lods = num_quads;
> }
> - else {
> - bld.num_lods = 1;
> + else if (!is_fetch && min_img_filter != mag_img_filter) {
> + bld.num_mips = 1;
> + bld.num_lods = num_quads;
> }
>
> - bld.levelf_type = type;
> +
> + bld.lodf_type = type;
> /* we want native vector size to be able to use our intrinsics */
> if (bld.num_lods != type.length) {
> - bld.levelf_type.length = type.length > 4 ? ((type.length + 15) / 16) *
> 4 : 1;
> + /* TODO: this currently always has to be per-quad or per-element */
> + bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4
> : 1;
> + }
> + bld.lodi_type = lp_int_type(bld.lodf_type);
> + bld.levelf_type = bld.lodf_type;
> + if (bld.num_mips == 1) {
> + bld.levelf_type.length = 1;
> }
> bld.leveli_type = lp_int_type(bld.levelf_type);
> bld.float_size_type = bld.float_size_in_type;
> /* Note: size vectors may not be native. They contain minified w/h/d/_
> values,
> * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32
> */
> - if (bld.num_lods > 1) {
> - bld.float_size_type.length = bld.num_lods == type.length ?
> - bld.num_lods *
> bld.float_size_in_type.length :
> + if (bld.num_mips > 1) {
> + bld.float_size_type.length = bld.num_mips == type.length ?
> + bld.num_mips *
> bld.float_size_in_type.length :
> type.length;
> }
> bld.int_size_type = lp_int_type(bld.float_size_type);
> @@ -1984,6 +2007,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
> lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
> lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
> + lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
> + lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
>
> /* Get the dynamic state */
> tex_width = dynamic_state->width(dynamic_state, gallivm, texture_index);
> @@ -2071,16 +2096,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> * (It should be faster if we'd support avx2)
> */
> if (num_quads == 1 || !use_aos) {
> -
> - if (num_quads > 1) {
> - if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
> - LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
> - /*
> - * This parameter is the same for all quads could probably
> simplify.
> - */
> - ilevel0 = LLVMBuildExtractElement(builder, ilevel0, index0,
> "");
> - }
> - }
> if (use_aos) {
> /* do sampling/filtering with fixed pt arithmetic */
> lp_build_sample_aos(&bld, sampler_index,
> @@ -2134,30 +2149,37 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
> bld4.texel_type = bld.texel_type;
> bld4.texel_type.length = 4;
> - bld4.levelf_type = type4;
> - /* we want native vector size to be able to use our intrinsics */
> - bld4.levelf_type.length = 1;
> - bld4.leveli_type = lp_int_type(bld4.levelf_type);
>
> + bld4.num_mips = bld4.num_lods = 1;
> if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
> - (explicit_lod || lod_bias ||
> - (derivs && static_texture_state->target !=
> PIPE_TEXTURE_CUBE)) &&
> - ((is_fetch && target != PIPE_BUFFER) ||
> - (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
> - bld4.num_lods = type4.length;
> - else
> - bld4.num_lods = 1;
> + (explicit_lod || lod_bias ||
> + (derivs && static_texture_state->target !=
> PIPE_TEXTURE_CUBE))) {
> + if ((is_fetch && target != PIPE_BUFFER) ||
> + (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
> + bld4.num_mips = type4.length;
> + bld4.num_lods = type4.length;
> + }
> + else if (!is_fetch && min_img_filter != mag_img_filter) {
> + bld4.num_mips = 1;
> + bld4.num_lods = type4.length;
> + }
> + }
>
> - bld4.levelf_type = type4;
> /* we want native vector size to be able to use our intrinsics */
> + bld4.lodf_type = type4;
> if (bld4.num_lods != type4.length) {
> + bld4.lodf_type.length = 1;
> + }
> + bld4.lodi_type = lp_int_type(bld4.lodf_type);
> + bld4.levelf_type = type4;
> + if (bld4.num_mips != type4.length) {
> bld4.levelf_type.length = 1;
> }
> bld4.leveli_type = lp_int_type(bld4.levelf_type);
> bld4.float_size_type = bld4.float_size_in_type;
> - if (bld4.num_lods > 1) {
> - bld4.float_size_type.length = bld4.num_lods == type4.length ?
> - bld4.num_lods *
> bld4.float_size_in_type.length :
> + if (bld4.num_mips > 1) {
> + bld4.float_size_type.length = bld4.num_mips == type4.length ?
> + bld4.num_mips *
> bld4.float_size_in_type.length :
> type4.length;
> }
> bld4.int_size_type = lp_int_type(bld4.float_size_type);
> @@ -2174,6 +2196,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
> lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
> lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
> + lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
> + lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
>
> for (i = 0; i < num_quads; i++) {
> LLVMValueRef s4, t4, r4;
> @@ -2196,7 +2220,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
> }
> }
> lod_positive4 = lp_build_extract_range(gallivm, lod_positive,
> num_lods * i, num_lods);
> - ilevel04 = lp_build_extract_range(gallivm, ilevel0, num_lods *
> i, num_lods);
> + ilevel04 = bld.num_mips == 1 ? ilevel0 :
> + lp_build_extract_range(gallivm, ilevel0, num_lods
> * i, num_lods);
> if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
> ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods
> * i, num_lods);
> lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart,
> num_lods * i, num_lods);
> --
> 1.7.9.5
>
More information about the mesa-dev
mailing list