[Mesa-dev] [PATCH] gallivm: clean up passing derivatives around

sroland at vmware.com sroland at vmware.com
Fri Mar 8 16:56:31 PST 2013


From: Roland Scheidegger <sroland at vmware.com>

Previously, the derivatives were calculated and passed in a packed form
to the sample code (for implicit derivatives, explicit derivatives were
packed to the same format).
There's several reasons why this wasn't such a good idea:
1) the derivatives may not even be needed (not as bad as it sounds since
llvm will just throw the calculations needed for them away but still)
2) the special packing format really shouldn't be part of the sampler
interface
3) depending what the sample code actually does the derivatives will
be processed differently, hence there is no "ideal" packing. For cube
maps with explicit derivatives (which we don't do yet) for instance the
packing looked downright useless, and for non-isotropic filtering we'd
need different calculations too.

So, instead just pass the derivatives as is (for explicit derivatives),
or let the rho calculating sample code calculate them itself. This still
does exactly the same packing stuff for implicit derivatives for now,
though explicit ones are handled in a more straightforward manner (quick
estimates show performance should be quite similar, though it is much
easier to follow and also does the rho calculation per-pixel until the
end, which we eventually need for spec compliance anyway).

No piglit changes.
---
 src/gallium/auxiliary/gallivm/lp_bld_quad.c       |   14 +-
 src/gallium/auxiliary/gallivm/lp_bld_sample.c     |  271 +++++++++++++--------
 src/gallium/auxiliary/gallivm/lp_bld_sample.h     |    6 +-
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |   11 +-
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c   |   21 +-
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c   |  122 +---------
 6 files changed, 196 insertions(+), 249 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
index 8a0efed..1955add 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
@@ -79,14 +79,9 @@ lp_build_ddy(struct lp_build_context *bld,
 }
 
 /*
- * To be able to handle multiple quads at once in texture sampling and
- * do lod calculations per quad, it is necessary to get the per-quad
- * derivatives into the lp_build_rho function.
- * For 8-wide vectors the packed derivative values for 3 coords would
- * look like this, this scales to a arbitrary (multiple of 4) vector size:
- * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy
+ * Helper for building packed ddx/ddy vector for one coord (scalar per quad
+ * values). The vector will look like this (8-wide):
  * dr1dx dr1dy _____ _____ dr2dx dr2dy _____ _____
- * The second vector will be unused for 1d and 2d textures.
  */
 LLVMValueRef
 lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
@@ -121,6 +116,11 @@ lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
 }
 
 
+/*
+ * Helper for building packed ddx/ddy vector for one coord (scalar per quad
+ * values). The vector will look like this (8-wide):
+ * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy
+ */
 LLVMValueRef
 lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld,
                                  LLVMValueRef a, LLVMValueRef b)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index ef0631c..fc8bae7 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -46,6 +46,7 @@
 #include "lp_bld_type.h"
 #include "lp_bld_logic.h"
 #include "lp_bld_pack.h"
+#include "lp_bld_quad.h"
 
 
 /*
@@ -203,6 +204,9 @@ lp_sampler_static_sampler_state(struct lp_static_sampler_state *state,
 static LLVMValueRef
 lp_build_rho(struct lp_build_sample_context *bld,
              unsigned texture_unit,
+             LLVMValueRef s,
+             LLVMValueRef t,
+             LLVMValueRef r,
              const struct lp_derivatives *derivs)
 {
    struct gallivm_state *gallivm = bld->gallivm;
@@ -211,8 +215,8 @@ lp_build_rho(struct lp_build_sample_context *bld,
    struct lp_build_context *float_bld = &bld->float_bld;
    struct lp_build_context *coord_bld = &bld->coord_bld;
    struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
-   const LLVMValueRef *ddx_ddy = derivs->ddx_ddy;
    const unsigned dims = bld->dims;
+   LLVMValueRef ddx_ddy[2];
    LLVMBuilderRef builder = bld->gallivm->builder;
    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
    LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
@@ -229,59 +233,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
    LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
    LLVMValueRef rho_xvec, rho_yvec;
 
-   abs_ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
-   if (dims > 2) {
-      abs_ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
-   }
-   else {
-      abs_ddx_ddy[1] = NULL;
-   }
-
-   if (dims == 1) {
-      static const unsigned char swizzle1[] = {
-         0, LP_BLD_SWIZZLE_DONTCARE,
-         LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
-      };
-      static const unsigned char swizzle2[] = {
-         1, LP_BLD_SWIZZLE_DONTCARE,
-         LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
-      };
-      rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1);
-      rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2);
-   }
-   else if (dims == 2) {
-      static const unsigned char swizzle1[] = {
-         0, 2,
-         LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
-      };
-      static const unsigned char swizzle2[] = {
-         1, 3,
-         LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
-      };
-      rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1);
-      rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2);
-   }
-   else {
-      LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
-      LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH];
-      assert(dims == 3);
-      for (i = 0; i < num_quads; i++) {
-         shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i);
-         shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2);
-         shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i);
-         shuffles1[4*i + 3] = i32undef;
-         shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1);
-         shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3);
-         shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 1);
-         shuffles2[4*i + 3] = i32undef;
-      }
-      rho_xvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1],
-                                        LLVMConstVector(shuffles1, length), "");
-      rho_yvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1],
-                                        LLVMConstVector(shuffles2, length), "");
-   }
-
-   rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
+   /* Note that all simplified calculations will only work for isotropic filtering */
 
    first_level = bld->dynamic_state->first_level(bld->dynamic_state,
                                                  bld->gallivm, texture_unit);
@@ -289,76 +241,176 @@ lp_build_rho(struct lp_build_sample_context *bld,
    int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec);
    float_size = lp_build_int_to_float(float_size_bld, int_size);
 
-   if (bld->coord_type.length > 4) {
-      /* expand size to each quad */
+   /* XXX ignoring explicit derivs for cube maps for now */
+   if (derivs && !(bld->static_texture_state->target == PIPE_TEXTURE_CUBE)) {
+      LLVMValueRef ddmax[3];
+      for (i = 0; i < dims; i++) {
+         LLVMValueRef ddx, ddy;
+         LLVMValueRef floatdim;
+         LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
+         ddx = lp_build_abs(coord_bld, derivs->ddx[i]);
+         ddy = lp_build_abs(coord_bld, derivs->ddy[i]);
+         ddmax[i] = lp_build_max(coord_bld, ddx, ddy);
+         floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
+                                               coord_bld->type, float_size, indexi);
+         ddmax[i] = lp_build_mul(coord_bld, floatdim, ddmax[i]);
+      }
+      rho_vec = ddmax[0];
       if (dims > 1) {
-         /* could use some broadcast_vector helper for this? */
-         int num_quads = bld->coord_type.length / 4;
-         LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4];
-         for (i = 0; i < num_quads; i++) {
-            src[i] = float_size;
+         rho_vec = lp_build_max(coord_bld, rho_vec, ddmax[1]);
+         if (dims > 2) {
+            rho_vec = lp_build_max(coord_bld, rho_vec, ddmax[2]);
+         }
+      }
+      /*
+       * rho_vec now still contains per-pixel rho, convert to scalar per quad
+       * since we can't handle per-pixel rho/lod from now on (TODO).
+       */
+      rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
+                                      perquadf_bld->type, rho_vec, 0);
+   }
+   else {
+      /*
+       * This looks all a bit complex, but it's not that bad
+       * (the shuffle code makes it look worse than it is).
+       * Still, might not be ideal for all cases.
+       */
+      if (dims < 2) {
+         ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s);
+      }
+      else if (dims >= 2) {
+         ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld,
+                                                       s, t);
+         if (dims > 2) {
+            ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r);
          }
-         float_size = lp_build_concat(bld->gallivm, src, float_size_bld->type, num_quads);
+      }
+
+      abs_ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
+      if (dims > 2) {
+         abs_ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
       }
       else {
-         float_size = lp_build_broadcast_scalar(coord_bld, float_size);
+         abs_ddx_ddy[1] = NULL;
       }
-      rho_vec = lp_build_mul(coord_bld, rho_vec, float_size);
 
-      if (dims <= 1) {
-         rho = rho_vec;
+      if (dims == 1) {
+         static const unsigned char swizzle1[] = {
+            0, LP_BLD_SWIZZLE_DONTCARE,
+            LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+         };
+         static const unsigned char swizzle2[] = {
+            1, LP_BLD_SWIZZLE_DONTCARE,
+            LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+         };
+         rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1);
+         rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2);
+      }
+      else if (dims == 2) {
+         static const unsigned char swizzle1[] = {
+            0, 2,
+            LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+         };
+         static const unsigned char swizzle2[] = {
+            1, 3,
+            LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+         };
+         rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1);
+         rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2);
       }
       else {
-         if (dims >= 2) {
-            static const unsigned char swizzle1[] = {
-               0, LP_BLD_SWIZZLE_DONTCARE,
-               LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
-            };
-            static const unsigned char swizzle2[] = {
-               1, LP_BLD_SWIZZLE_DONTCARE,
-               LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
-            };
-            LLVMValueRef rho_s, rho_t, rho_r;
-
-            rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
-            rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2);
-
-            rho = lp_build_max(coord_bld, rho_s, rho_t);
-
-            if (dims >= 3) {
-               static const unsigned char swizzle3[] = {
-                  2, LP_BLD_SWIZZLE_DONTCARE,
+         LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
+         LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH];
+         assert(dims == 3);
+         for (i = 0; i < num_quads; i++) {
+            shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i);
+            shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2);
+            shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i);
+            shuffles1[4*i + 3] = i32undef;
+            shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1);
+            shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3);
+            shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 1);
+            shuffles2[4*i + 3] = i32undef;
+         }
+         rho_xvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1],
+                                           LLVMConstVector(shuffles1, length), "");
+         rho_yvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1],
+                                           LLVMConstVector(shuffles2, length), "");
+      }
+
+      rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
+
+      if (bld->coord_type.length > 4) {
+         /* expand size to each quad */
+         if (dims > 1) {
+            /* could use some broadcast_vector helper for this? */
+            int num_quads = bld->coord_type.length / 4;
+            LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4];
+            for (i = 0; i < num_quads; i++) {
+               src[i] = float_size;
+            }
+            float_size = lp_build_concat(bld->gallivm, src, float_size_bld->type, num_quads);
+         }
+         else {
+            float_size = lp_build_broadcast_scalar(coord_bld, float_size);
+         }
+         rho_vec = lp_build_mul(coord_bld, rho_vec, float_size);
+
+         if (dims <= 1) {
+            rho = rho_vec;
+         }
+         else {
+            if (dims >= 2) {
+               static const unsigned char swizzle1[] = {
+                  0, LP_BLD_SWIZZLE_DONTCARE,
                   LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
                };
-               rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle3);
-               rho = lp_build_max(coord_bld, rho, rho_r);
+               static const unsigned char swizzle2[] = {
+                  1, LP_BLD_SWIZZLE_DONTCARE,
+                  LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+               };
+               LLVMValueRef rho_s, rho_t, rho_r;
+
+               rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
+               rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2);
+
+               rho = lp_build_max(coord_bld, rho_s, rho_t);
+
+               if (dims >= 3) {
+                  static const unsigned char swizzle3[] = {
+                     2, LP_BLD_SWIZZLE_DONTCARE,
+                     LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+                  };
+                  rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle3);
+                  rho = lp_build_max(coord_bld, rho, rho_r);
+               }
             }
          }
-      }
-      rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                      perquadf_bld->type, rho, 0);
-   }
-   else {
-      if (dims <= 1) {
-         rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, "");
-      }
-      rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
-
-      if (dims <= 1) {
-         rho = rho_vec;
+         rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
+                                         perquadf_bld->type, rho, 0);
       }
       else {
-         if (dims >= 2) {
-            LLVMValueRef rho_s, rho_t, rho_r;
+         if (dims <= 1) {
+            rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, "");
+         }
+         rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
+
+         if (dims <= 1) {
+            rho = rho_vec;
+         }
+         else {
+            if (dims >= 2) {
+               LLVMValueRef rho_s, rho_t, rho_r;
 
-            rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
-            rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");
+               rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
+               rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");
 
-            rho = lp_build_max(float_bld, rho_s, rho_t);
+               rho = lp_build_max(float_bld, rho_s, rho_t);
 
-            if (dims >= 3) {
-               rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, "");
-               rho = lp_build_max(float_bld, rho, rho_r);
+               if (dims >= 3) {
+                  rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, "");
+                  rho = lp_build_max(float_bld, rho, rho_r);
+               }
             }
          }
       }
@@ -511,6 +563,9 @@ void
 lp_build_lod_selector(struct lp_build_sample_context *bld,
                       unsigned texture_unit,
                       unsigned sampler_unit,
+                      LLVMValueRef s,
+                      LLVMValueRef t,
+                      LLVMValueRef r,
                       const struct lp_derivatives *derivs,
                       LLVMValueRef lod_bias, /* optional */
                       LLVMValueRef explicit_lod, /* optional */
@@ -544,7 +599,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
       else {
          LLVMValueRef rho;
 
-         rho = lp_build_rho(bld, texture_unit, derivs);
+         rho = lp_build_rho(bld, texture_unit, s, t, r, derivs);
 
          /*
           * Compute lod = log2(rho)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 6306455..1abe0ca 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -56,7 +56,8 @@ struct lp_build_context;
  */
 struct lp_derivatives
 {
-   LLVMValueRef ddx_ddy[2];
+   LLVMValueRef ddx[3];
+   LLVMValueRef ddy[3];
 };
 
 
@@ -366,6 +367,9 @@ void
 lp_build_lod_selector(struct lp_build_sample_context *bld,
                       unsigned texture_index,
                       unsigned sampler_index,
+                      LLVMValueRef s,
+                      LLVMValueRef t,
+                      LLVMValueRef r,
                       const struct lp_derivatives *derivs,
                       LLVMValueRef lod_bias, /* optional */
                       LLVMValueRef explicit_lod, /* optional */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 8aa4166..cdd910f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -1077,7 +1077,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
                        LLVMValueRef *s,
                        LLVMValueRef *t,
                        LLVMValueRef *r,
-                       const struct lp_derivatives *derivs,
+                       const struct lp_derivatives *derivs, /* optional */
                        LLVMValueRef lod_bias, /* optional */
                        LLVMValueRef explicit_lod, /* optional */
                        LLVMValueRef *lod_ipart,
@@ -1090,7 +1090,6 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
    const unsigned target = bld->static_texture_state->target;
    LLVMValueRef first_level;
-   struct lp_derivatives face_derivs;
 
    /*
    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
@@ -1107,11 +1106,6 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
       *t = face_t; /* vec */
       /* use 'r' to indicate cube face */
       *r = face; /* vec */
-
-      /* recompute ddx, ddy using the new (s,t) face texcoords */
-      face_derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(&bld->coord_bld, *s, *t);
-      face_derivs.ddx_ddy[1] = NULL;
-      derivs = &face_derivs;
    }
    else if (target == PIPE_TEXTURE_1D_ARRAY) {
       *r = lp_build_iround(&bld->coord_bld, *t);
@@ -1131,6 +1125,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
        * distinguish between minification/magnification with one mipmap level.
        */
       lp_build_lod_selector(bld, texture_index, sampler_index,
+                            *s, *t, *r,
                             derivs, lod_bias, explicit_lod,
                             mip_filter,
                             lod_ipart, lod_fpart);
@@ -1479,7 +1474,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
                     unsigned sampler_index,
                     const LLVMValueRef *coords,
                     const LLVMValueRef *offsets,
-                    const struct lp_derivatives *derivs,
+                    const struct lp_derivatives *derivs, /* optional */
                     LLVMValueRef lod_bias, /* optional */
                     LLVMValueRef explicit_lod, /* optional */
                     LLVMValueRef texel_out[4])
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index 9a30cc8..98bce0e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -363,7 +363,7 @@ emit_tex(struct lp_build_tgsi_aos_context *bld,
    unsigned target;
    unsigned unit;
    LLVMValueRef coords;
-   struct lp_derivatives derivs;
+   struct lp_derivatives derivs = { {NULL}, {NULL} };
 
    if (!bld->sampler) {
       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
@@ -374,22 +374,15 @@ emit_tex(struct lp_build_tgsi_aos_context *bld,
 
    coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
 
-   if (0 && modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
-      lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
-      lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
+   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
+      /* probably not going to work */
+      derivs.ddx[0] = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
+      derivs.ddy[0] = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
       unit = inst->Src[3].Register.Index;
-   }  else {
-#if 0
-      ddx = lp_build_ddx( &bld->bld_base.base, coords );
-      ddy = lp_build_ddy( &bld->bld_base.base, coords );
-#else
-      /* TODO */
-      derivs.ddx_ddy[0] = bld->bld_base.base.one;
-      derivs.ddx_ddy[1] = bld->bld_base.base.one;
-#endif
+   }
+   else {
       unit = inst->Src[1].Register.Index;
    }
-
    return bld->sampler->emit_fetch_texel(bld->sampler,
                                          &bld->bld_base.base,
                                          target, unit,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 69957fe..9fe87c4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -1164,14 +1164,13 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
           enum lp_build_tex_modifier modifier,
           LLVMValueRef *texel)
 {
-   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
-   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
    unsigned unit;
    LLVMValueRef lod_bias, explicit_lod;
    LLVMValueRef oow = NULL;
    LLVMValueRef coords[4];
    LLVMValueRef offsets[3] = { NULL };
    struct lp_derivatives derivs;
+   struct lp_derivatives *deriv_ptr = NULL;
    unsigned num_coords;
    unsigned dims;
    unsigned i;
@@ -1184,9 +1183,6 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
       return;
    }
 
-   derivs.ddx_ddy[0] = bld->bld_base.base.undef;
-   derivs.ddx_ddy[1] = bld->bld_base.base.undef;
-
    switch (inst->Texture.Texture) {
    case TGSI_TEXTURE_1D:
       num_coords = 1;
@@ -1259,58 +1255,14 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
    }
 
    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
-      LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
-      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
-      LLVMValueRef ddxdyonec[3];
-      unsigned length = bld->bld_base.base.type.length;
-      unsigned num_quads = length / 4;
       unsigned dim;
-      unsigned quad;
-
       for (dim = 0; dim < dims; ++dim) {
-         LLVMValueRef srcx = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim );
-         LLVMValueRef srcy = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim );
-         for (quad = 0; quad < num_quads; ++quad) {
-            unsigned s1 = 4*quad;
-            unsigned s2 = 4*quad + length;
-            shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
-            shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s2);
-            shuffles[4*quad + 2] = i32undef;
-            shuffles[4*quad + 3] = i32undef;
-         }
-         ddxdyonec[dim] = LLVMBuildShuffleVector(builder, srcx, srcy,
-                                               LLVMConstVector(shuffles, length), "");
-      }
-      if (dims == 1) {
-         derivs.ddx_ddy[0] = ddxdyonec[0];
-      }
-      else if (dims >= 2) {
-         for (quad = 0; quad < num_quads; ++quad) {
-            unsigned s1 = 4*quad;
-            unsigned s2 = 4*quad + length;
-            shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
-            shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s1 + 1);
-            shuffles[4*quad + 2] = lp_build_const_int32(gallivm, s2);
-            shuffles[4*quad + 3] = lp_build_const_int32(gallivm, s2 + 1);
-         }
-         derivs.ddx_ddy[0] = LLVMBuildShuffleVector(builder, ddxdyonec[0], ddxdyonec[1],
-                                                  LLVMConstVector(shuffles, length), "");
-         if (dims == 3) {
-            derivs.ddx_ddy[1] = ddxdyonec[2];
-         }
+         derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim );
+         derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim );
       }
+      deriv_ptr = &derivs;
       unit = inst->Src[3].Register.Index;
    }  else {
-      if (dims == 1) {
-         derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[0]);
-      }
-      else if (dims >= 2) {
-         derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(&bld->bld_base.base,
-                                                            coords[0], coords[1]);
-         if (dims == 3) {
-            derivs.ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[2]);
-         }
-      }
       unit = inst->Src[1].Register.Index;
    }
 
@@ -1329,7 +1281,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
                                   unit, unit,
                                   coords,
                                   offsets,
-                                  &derivs,
+                                  deriv_ptr,
                                   lod_bias, explicit_lod,
                                   texel);
 }
@@ -1341,13 +1293,13 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
             boolean compare,
             LLVMValueRef *texel)
 {
-   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
    unsigned texture_unit, sampler_unit;
    LLVMValueRef lod_bias, explicit_lod;
    LLVMValueRef coords[4];
    LLVMValueRef offsets[3] = { NULL };
    struct lp_derivatives derivs;
+   struct lp_derivatives *deriv_ptr = NULL;
    unsigned num_coords, dims;
    unsigned i;
 
@@ -1366,9 +1318,6 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
    texture_unit = inst->Src[1].Register.Index;
    sampler_unit = inst->Src[2].Register.Index;
 
-   derivs.ddx_ddy[0] = bld->bld_base.base.undef;
-   derivs.ddx_ddy[1] = bld->bld_base.base.undef;
-
    /*
     * Note inst->Texture.Texture will contain the number of offsets,
     * however the target information is NOT there and comes from the
@@ -1449,57 +1398,12 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
    }
 
    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
-      LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
-      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
-      LLVMValueRef ddxdyonec[3];
-      unsigned length = bld->bld_base.base.type.length;
-      unsigned num_quads = length / 4;
       unsigned dim;
-      unsigned quad;
-
       for (dim = 0; dim < dims; ++dim) {
-         LLVMValueRef srcx = lp_build_emit_fetch( &bld->bld_base, inst, 3, dim );
-         LLVMValueRef srcy = lp_build_emit_fetch( &bld->bld_base, inst, 4, dim );
-         for (quad = 0; quad < num_quads; ++quad) {
-            unsigned s1 = 4*quad;
-            unsigned s2 = 4*quad + length;
-            shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
-            shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s2);
-            shuffles[4*quad + 2] = i32undef;
-            shuffles[4*quad + 3] = i32undef;
-         }
-         ddxdyonec[dim] = LLVMBuildShuffleVector(builder, srcx, srcy,
-                                               LLVMConstVector(shuffles, length), "");
-      }
-      if (dims == 1) {
-         derivs.ddx_ddy[0] = ddxdyonec[0];
-      }
-      else if (dims >= 2) {
-         for (quad = 0; quad < num_quads; ++quad) {
-            unsigned s1 = 4*quad;
-            unsigned s2 = 4*quad + length;
-            shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
-            shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s1 + 1);
-            shuffles[4*quad + 2] = lp_build_const_int32(gallivm, s2);
-            shuffles[4*quad + 3] = lp_build_const_int32(gallivm, s2 + 1);
-         }
-         derivs.ddx_ddy[0] = LLVMBuildShuffleVector(builder, ddxdyonec[0], ddxdyonec[1],
-                                                  LLVMConstVector(shuffles, length), "");
-         if (dims == 3) {
-            derivs.ddx_ddy[1] = ddxdyonec[2];
-         }
-      }
-   }  else {
-      if (dims == 1) {
-         derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[0]);
-      }
-      else if (dims >= 2) {
-         derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(&bld->bld_base.base,
-                                                            coords[0], coords[1]);
-         if (dims == 3) {
-            derivs.ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[2]);
-         }
+         derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 3, dim );
+         derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 4, dim );
       }
+      deriv_ptr = &derivs;
    }
 
    /* some advanced gather instructions (txgo) would require 4 offsets */
@@ -1517,7 +1421,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
                                   texture_unit, sampler_unit,
                                   coords,
                                   offsets,
-                                  &derivs,
+                                  deriv_ptr,
                                   lod_bias, explicit_lod,
                                   texel);
 }
@@ -1533,7 +1437,6 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
    LLVMValueRef explicit_lod = NULL;
    LLVMValueRef coords[3];
    LLVMValueRef offsets[3] = { NULL };
-   struct lp_derivatives derivs;
    unsigned num_coords;
    unsigned dims;
    unsigned i;
@@ -1548,9 +1451,6 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
 
    unit = inst->Src[1].Register.Index;
 
-   derivs.ddx_ddy[0] = coord_undef;
-   derivs.ddx_ddy[1] = coord_undef;
-
    if (is_samplei) {
       target = bld->sv[unit].Resource;
    }
@@ -1612,7 +1512,7 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
                                   unit, unit,
                                   coords,
                                   offsets,
-                                  &derivs,
+                                  NULL,
                                   NULL, explicit_lod,
                                   texel);
 }
-- 
1.7.9.5



More information about the mesa-dev mailing list