[Mesa-dev] [PATCH 1/3] gallivm: ignore rho approximation for cube maps

Thu Oct 3 12:40:40 PDT 2013

On 10/03/2013 09:42 AM, sroland at vmware.com wrote:
> From: Roland Scheidegger <sroland at vmware.com>
>
> There's two reasons for this:
> 1) even when ignoring rho approximation for cube maps, the result is still
> not correct, but it's better as the max error at edges is now sqrt(2) instead
> of 2 (which was a full mip level), same as it is for ordinary 2d maps when
> doing rho approximations (so the error actually goes from factor 2 at edges and
> sqrt(2) completely inside a face to sqrt(2) at edges and 0 inside a face).
> 2) I want to repurpose rho_no_approx for cubemaps for fully correct cubemap
> derivatives (so don't need yet another debug var).
> ---
>   src/gallium/auxiliary/gallivm/lp_bld_sample.c |   34 +++++++++----------------
>   1 file changed, 12 insertions(+), 22 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> index c775382..ea6bec7 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> @@ -269,10 +269,8 @@ lp_build_rho(struct lp_build_sample_context *bld,
>         /* Could optimize this for single quad just skip the broadcast */
>         cubesize = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
>                                               rho_bld->type, float_size, index0);
> -      if (no_rho_opt) {
> -         /* skipping sqrt hence returning rho squared */
> -         cubesize = lp_build_mul(rho_bld, cubesize, cubesize);
> -      }
> +      /* skipping sqrt hence returning rho squared */
> +      cubesize = lp_build_mul(rho_bld, cubesize, cubesize);
>         rho = lp_build_mul(rho_bld, cubesize, rho);
>      }
>      else if (derivs && !(bld->static_texture_state->target == PIPE_TEXTURE_CUBE)) {
> @@ -757,8 +755,8 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
>         }
>         else {
>            LLVMValueRef rho;
> -         boolean rho_squared = (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
> -                               (bld->dims > 1);
> +         boolean rho_squared = ((gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
> +                                (bld->dims > 1)) || cube_rho;
>
>            rho = lp_build_rho(bld, texture_unit, s, t, r, cube_rho, derivs);
>
> @@ -1602,31 +1600,23 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
>             * know the texture is square which simplifies things (we can omit the
>             * size mul which happens very early completely here and do it at the
>             * very end).
> +          * Also always do calculations according to GALLIVM_DEBUG_NO_RHO_APPROX
> +          * since the error can get quite big otherwise at edges.
> +          * (With no_rho_approx max error is sqrt(2) at edges, same as it is
> +          * without no_rho_approx for 2d textures, otherwise it would be factor 2.)
>             */
>            ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
>            ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r);
>
> -         if (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) {
> -            ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], ddx_ddy[0]);
> -            ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], ddx_ddy[1]);
> -         }
> -         else {
> -            ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
> -            ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
> -         }
> +         ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], ddx_ddy[0]);
> +         ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], ddx_ddy[1]);
>
>            tmp[0] = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle01);
>            tmp[1] = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle23);
>            tmp[2] = lp_build_swizzle_aos(coord_bld, ddx_ddy[1], swizzle02);
>
> -         if (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) {
> -            rho_vec = lp_build_add(coord_bld, tmp[0], tmp[1]);
> -            rho_vec = lp_build_add(coord_bld, rho_vec, tmp[2]);
> -         }
> -         else {
> -            rho_vec = lp_build_max(coord_bld, tmp[0], tmp[1]);
> -            rho_vec = lp_build_max(coord_bld, rho_vec, tmp[2]);
> -         }
> +         rho_vec = lp_build_add(coord_bld, tmp[0], tmp[1]);
> +         rho_vec = lp_build_add(coord_bld, rho_vec, tmp[2]);

I don't know how often we have these 3-way lp_build_add() sequences, but 
would an lp_build_add3(bld, a, b, c) be useful?

>
>            tmp[0] = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
>            tmp[1] = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
>