[Mesa-dev] [PATCH] gallivm: implement fully accurate corner filtering for seamless cube maps

sroland at vmware.com sroland at vmware.com
Tue Oct 22 04:29:52 CEST 2013


From: Roland Scheidegger <sroland at vmware.com>

d3d10 requires that cube corners are filtered with accurate weights (that
is, the weight of the non-existing corner texel should be evenly distributed
to the other 3 texels). OpenGL does not require this (but recommends it).
This requires us to use different filtering code, since we need per-texel
weights which our 2d lerp doesn't (and can't) do. And of course the (now
per element) weights need to be adjusted too for it to work.
Invoke the new filtering code whenever there's an edge to keep things simpler,
as it will work for edges too not just corners but of course it's only needed
with corners.
More ugly code for not much gain but at least a hacked up cubemap demo
shows very nice corners now... Not sure yet if and how this should be
configurable...
---
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |  138 +++++++++++++++++++--
 1 file changed, 130 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 8e2d0d9..5d3511d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -840,7 +840,11 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                              const LLVMValueRef *offsets,
                              LLVMValueRef colors_out[4])
 {
+   LLVMBuilderRef builder = bld->gallivm->builder;
+   struct lp_build_context *ivec_bld = &bld->int_coord_bld;
+   struct lp_build_context *coord_bld = &bld->coord_bld;
    const unsigned dims = bld->dims;
+   struct lp_build_if_state edge_if;
    LLVMValueRef width_vec;
    LLVMValueRef height_vec;
    LLVMValueRef depth_vec;
@@ -848,6 +852,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
    LLVMValueRef flt_width_vec;
    LLVMValueRef flt_height_vec;
    LLVMValueRef flt_depth_vec;
+   LLVMValueRef fall_off[4], have_edge;
    LLVMValueRef z1 = NULL;
    LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL;
    LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL;
@@ -856,6 +861,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
    LLVMValueRef xs[4], ys[4], zs[4];
    LLVMValueRef neighbors[2][2][4];
    int chan, texel_index;
+   boolean silly_but_accurate_cube_corner_filtering = TRUE;
 
    lp_build_extract_image_sizes(bld,
                                 &bld->int_size_bld,
@@ -918,12 +924,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
       }
    }
    else {
-      LLVMBuilderRef builder = bld->gallivm->builder;
-      struct lp_build_context *ivec_bld = &bld->int_coord_bld;
-      struct lp_build_context *coord_bld = &bld->coord_bld;
-      struct lp_build_if_state edge_if;
-      LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
-      LLVMValueRef fall_off[4], coord, have_edge;
+      LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2], coord;
       LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp;
       LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp;
       LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped;
@@ -1074,7 +1075,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
 
    if (linear_mask) {
       /*
-       * Whack filter weights into place. Whatever pixel had more weight is
+       * Whack filter weights into place. Whatever texel had more weight is
        * the one which should have been selected by nearest filtering hence
        * just use 100% weight for it.
        */
@@ -1135,7 +1136,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
    }
    else {
       /* 2D/3D texture */
-      LLVMValueRef colors0[4];
+      LLVMValueRef colors0[4], colorss[4];
 
       /* get x0/x1 texels at y1 */
       lp_build_sample_texel_soa(bld,
@@ -1149,6 +1150,111 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                                 row_stride_vec, img_stride_vec,
                                 data_ptr, mipoffsets, neighbors[1][1]);
 
+      /*
+       * To avoid having to duplicate linear_mask / fetch code use
+       * another branch (with same edge condition) here (note that
+       * since we're using another branch anyway we COULD restrict this
+       * rather easily to just corners).
+       */
+      if (silly_but_accurate_cube_corner_filtering &&
+          bld->static_texture_state->target == PIPE_TEXTURE_CUBE &&
+          bld->static_sampler_state->seamless_cube_map) {
+         LLVMValueRef w00, w01, w10, w11, wx0, wy0;
+         LLVMValueRef c_weight, c00, c01, c10, c11;
+         LLVMValueRef one_third, tmp;
+
+         colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
+         colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
+         colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
+         colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
+
+         lp_build_if(&edge_if, bld->gallivm, have_edge);
+
+         /*
+          * we can't use standard 2d lerp as we need per-element weight
+          * in case of corners, so just calculate bilinear result as
+          * w00*s00 + w01*s01 + w10*s10 + w11*s11.
+          * (This is actually less work than using 2d lerp, 7 vs. 9 instructions,
+          * however calculating the weights needs another 6, so actually probably
+          * not slower than 2d lerp only for 4 channels as weights only need
+          * to be calculated once - of course fixing the weights has additional cost.)
+          */
+         wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
+         wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
+         w00 = lp_build_mul(coord_bld, wx0, wy0);
+         w01 = lp_build_mul(coord_bld, s_fpart, wy0);
+         w10 = lp_build_mul(coord_bld, wx0, t_fpart);
+         w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
+
+         /* find corner weight */
+         c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
+         c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
+         c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
+         c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
+         c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]);
+         c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
+         c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]);
+         c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
+
+         /*
+          * add 1/3 of the corner weight to each of the 3 other samples
+          * and null out corner weight
+          */
+         one_third = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.3333333333f);
+         c_weight = lp_build_mul(coord_bld, c_weight, one_third);
+         w00 = lp_build_add(coord_bld, w00, c_weight);
+         c00 = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
+         w00 = lp_build_andnot(coord_bld, w00, c00);
+         w01 = lp_build_add(coord_bld, w01, c_weight);
+         c01 = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
+         w01 = lp_build_andnot(coord_bld, w01, c01);
+         w10 = lp_build_add(coord_bld, w10, c_weight);
+         c10 = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
+         w10 = lp_build_andnot(coord_bld, w10, c10);
+         w11 = lp_build_add(coord_bld, w11, c_weight);
+         c11 = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
+         w11 = lp_build_andnot(coord_bld, w11, c11);
+
+         if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
+            for (chan = 0; chan < 4; chan++) {
+               colors0[chan] = lp_build_mul(coord_bld, w00, neighbors[0][0][chan]);
+               tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
+               colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
+               tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
+               colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
+               tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
+               colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
+            }
+         }
+         else {
+            LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
+            cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
+            cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
+            cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
+            cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
+            /* inputs to interpolation are just masks so just add masked weights together */
+            cmpval00 = LLVMBuildBitCast(builder, cmpval00, coord_bld->vec_type, "");
+            cmpval01 = LLVMBuildBitCast(builder, cmpval01, coord_bld->vec_type, "");
+            cmpval10 = LLVMBuildBitCast(builder, cmpval10, coord_bld->vec_type, "");
+            cmpval11 = LLVMBuildBitCast(builder, cmpval11, coord_bld->vec_type, "");
+            colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
+            tmp = lp_build_and(coord_bld, w01, cmpval01);
+            colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
+            tmp = lp_build_and(coord_bld, w10, cmpval10);
+            colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
+            tmp = lp_build_and(coord_bld, w11, cmpval11);
+            colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
+            colors0[1] = colors0[2] = colors0[3] = colors0[0];
+         }
+
+         LLVMBuildStore(builder, colors0[0], colorss[0]);
+         LLVMBuildStore(builder, colors0[1], colorss[1]);
+         LLVMBuildStore(builder, colors0[2], colorss[2]);
+         LLVMBuildStore(builder, colors0[3], colorss[3]);
+
+         lp_build_else(&edge_if);
+      }
+
       if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
          /* Bilinear interpolate the four samples from the 2D image / 3D slice */
          for (chan = 0; chan < 4; chan++) {
@@ -1172,6 +1278,22 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
          colors0[1] = colors0[2] = colors0[3] = colors0[0];
       }
 
+      if (silly_but_accurate_cube_corner_filtering &&
+          bld->static_texture_state->target == PIPE_TEXTURE_CUBE &&
+          bld->static_sampler_state->seamless_cube_map) {
+         LLVMBuildStore(builder, colors0[0], colorss[0]);
+         LLVMBuildStore(builder, colors0[1], colorss[1]);
+         LLVMBuildStore(builder, colors0[2], colorss[2]);
+         LLVMBuildStore(builder, colors0[3], colorss[3]);
+
+         lp_build_endif(&edge_if);
+
+         colors0[0] = LLVMBuildLoad(builder, colorss[0], "");
+         colors0[1] = LLVMBuildLoad(builder, colorss[1], "");
+         colors0[2] = LLVMBuildLoad(builder, colorss[2], "");
+         colors0[3] = LLVMBuildLoad(builder, colorss[3], "");
+      }
+
       if (dims == 3) {
          LLVMValueRef neighbors1[2][2][4];
          LLVMValueRef colors1[4];
-- 
1.7.9.5


More information about the mesa-dev mailing list