Mesa (gallium-llvmpipe): llvmpipe: Factor out and optimize the input interpolation.

Jose Fonseca jrfonseca at kemper.freedesktop.org
Sun Aug 23 11:29:27 UTC 2009


Module: Mesa
Branch: gallium-llvmpipe
Commit: 8f8f9ed4e8ab62cf0f18570d6b6349663655a988
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=8f8f9ed4e8ab62cf0f18570d6b6349663655a988

Author: José Fonseca <jfonseca at vmware.com>
Date:   Sun Aug 23 12:28:34 2009 +0100

llvmpipe: Factor out and optimize the input interpolation.

Special attention is given to the interpolation of side by side quads.
Multiplications are made only for the first quad. Interpolation of
inputs for posterior quads are done exclusively with additions, and
perspective divide if necessary.

---

 src/gallium/drivers/llvmpipe/Makefile          |    1 +
 src/gallium/drivers/llvmpipe/SConscript        |    1 +
 src/gallium/drivers/llvmpipe/lp_bld_interp.c   |  377 ++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_interp.h   |   99 ++++++
 src/gallium/drivers/llvmpipe/lp_bld_tgsi.h     |    6 +-
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c |  106 +------
 src/gallium/drivers/llvmpipe/lp_state_fs.c     |  100 ++-----
 7 files changed, 521 insertions(+), 169 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index ca0a8bf..4f31788 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -14,6 +14,7 @@ C_SOURCES = \
 	lp_bld_debug.c \
 	lp_bld_depth.c \
 	lp_bld_flow.c \
+	lp_bld_interp.c \
 	lp_bld_intr.c \
 	lp_bld_pack.c \
 	lp_bld_unpack.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 10ead09..ec82be1 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -18,6 +18,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_debug.c',
 		'lp_bld_depth.c',
 		'lp_bld_flow.c',
+		'lp_bld_interp.c',
 		'lp_bld_intr.c',
 		'lp_bld_pack.c',
 		'lp_bld_unpack.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
new file mode 100644
index 0000000..cfe20a0
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -0,0 +1,377 @@
+/**************************************************************************
+ * 
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ * @file
+ * Position and shader input interpolation.
+ *
+ * @author Jose Fonseca <jfonseca at vmware.com>
+ */
+
+#include "pipe/p_shader_tokens.h"
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "tgsi/tgsi_parse.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_interp.h"
+
+
+static void
+attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
+{
+   if(attrib == 0)
+      lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
+   else
+      lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
+}
+
+
+static void
+coeffs_init(struct lp_build_interp_soa_context *bld,
+            LLVMValueRef a0_ptr,
+            LLVMValueRef dadx_ptr,
+            LLVMValueRef dady_ptr)
+{
+   LLVMBuilderRef builder = bld->base.builder;
+   unsigned attrib;
+   unsigned chan;
+
+   for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
+      unsigned mask = bld->mask[attrib];
+      unsigned mode = bld->mode[attrib];
+      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+         if(mask & (1 << chan)) {
+            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0);
+            LLVMValueRef a0 = NULL;
+            LLVMValueRef dadx = NULL;
+            LLVMValueRef dady = NULL;
+
+            switch( mode ) {
+            case TGSI_INTERPOLATE_PERSPECTIVE:
+               /* fall-through */
+
+            case TGSI_INTERPOLATE_LINEAR:
+               dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
+               dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
+               dadx = lp_build_broadcast_scalar(&bld->base, dadx);
+               dady = lp_build_broadcast_scalar(&bld->base, dady);
+               attrib_name(dadx, attrib, chan, ".dadx");
+               attrib_name(dady, attrib, chan, ".dady");
+               /* fall-through */
+
+            case TGSI_INTERPOLATE_CONSTANT:
+               a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), "");
+               a0 = lp_build_broadcast_scalar(&bld->base, a0);
+               attrib_name(a0, attrib, chan, ".dady");
+               break;
+
+            default:
+               assert(0);
+               break;
+            }
+
+            bld->a0  [attrib][chan] = a0;
+            bld->dadx[attrib][chan] = dadx;
+            bld->dady[attrib][chan] = dady;
+         }
+      }
+   }
+}
+
+
+/**
+ * Small vector x scale multiplication optimization.
+ *
+ * TODO: Should be elsewhere.
+ */
+static LLVMValueRef
+coeff_multiply(struct lp_build_interp_soa_context *bld,
+               LLVMValueRef coeff,
+               int step)
+{
+   LLVMValueRef factor;
+
+   switch(step) {
+   case 0:
+      return bld->base.zero;
+   case 1:
+      return coeff;
+   case 2:
+      return lp_build_add(&bld->base, coeff, coeff);
+   default:
+      factor = lp_build_const_scalar(bld->base.type, (double)step);
+      return lp_build_mul(&bld->base, coeff, factor);
+   }
+}
+
+
+/**
+ * Multiply the dadx and dady with the xstep and ystep respectively.
+ */
+static void
+coeffs_update(struct lp_build_interp_soa_context *bld)
+{
+   unsigned attrib;
+   unsigned chan;
+
+   for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
+      unsigned mask = bld->mask[attrib];
+      unsigned mode = bld->mode[attrib];
+      if (mode != TGSI_INTERPOLATE_CONSTANT) {
+         for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+            if(mask & (1 << chan)) {
+               bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep);
+               bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep);
+            }
+         }
+      }
+   }
+}
+
+
+static void
+attribs_init(struct lp_build_interp_soa_context *bld)
+{
+   LLVMValueRef x = bld->pos[0];
+   LLVMValueRef y = bld->pos[1];
+   LLVMValueRef oow = NULL;
+   unsigned attrib;
+   unsigned chan;
+
+   for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
+      unsigned mask = bld->mask[attrib];
+      unsigned mode = bld->mode[attrib];
+      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+         if(mask & (1 << chan)) {
+            LLVMValueRef a0   = bld->a0  [attrib][chan];
+            LLVMValueRef dadx = bld->dadx[attrib][chan];
+            LLVMValueRef dady = bld->dady[attrib][chan];
+            LLVMValueRef res;
+
+            res = a0;
+
+            if (mode != TGSI_INTERPOLATE_CONSTANT) {
+               res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, x, dadx));
+               res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, y, dady));
+            }
+
+            /* Keep the value of the attribue before perspective divide for faster updates */
+            bld->attribs_pre[attrib][chan] = res;
+
+            if (mode == TGSI_INTERPOLATE_PERSPECTIVE) {
+               LLVMValueRef w = bld->pos[3];
+               assert(attrib != 0);
+               if(!oow)
+                  oow = lp_build_rcp(&bld->base, w);
+               res = lp_build_mul(&bld->base, res, oow);
+            }
+
+            attrib_name(res, attrib, chan, "");
+
+            bld->attribs[attrib][chan] = res;
+         }
+      }
+   }
+}
+
+
+static void
+attribs_update(struct lp_build_interp_soa_context *bld)
+{
+   LLVMValueRef oow = NULL;
+   unsigned attrib;
+   unsigned chan;
+
+   for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
+      unsigned mask = bld->mask[attrib];
+      unsigned mode = bld->mode[attrib];
+
+      if (mode != TGSI_INTERPOLATE_CONSTANT) {
+         for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+            if(mask & (1 << chan)) {
+               LLVMValueRef dadx = bld->dadx[attrib][chan];
+               LLVMValueRef dady = bld->dady[attrib][chan];
+               LLVMValueRef res;
+
+               res = bld->attribs_pre[attrib][chan];
+
+               if(bld->xstep)
+                  res = lp_build_add(&bld->base, res, dadx);
+
+               if(bld->ystep)
+                  res = lp_build_add(&bld->base, res, dady);
+
+               bld->attribs_pre[attrib][chan] = res;
+
+               if (mode == TGSI_INTERPOLATE_PERSPECTIVE) {
+                  LLVMValueRef w = bld->pos[3];
+                  assert(attrib != 0);
+                  if(!oow)
+                     oow = lp_build_rcp(&bld->base, w);
+                  res = lp_build_mul(&bld->base, res, oow);
+               }
+
+               attrib_name(res, attrib, chan, "");
+
+               bld->attribs[attrib][chan] = res;
+            }
+         }
+      }
+   }
+}
+
+
+/**
+ * Generate the position vectors.
+ *
+ * Parameter x0, y0 are the integer values with the quad upper left coordinates.
+ */
+static void
+pos_init(struct lp_build_interp_soa_context *bld,
+         LLVMValueRef x0,
+         LLVMValueRef y0)
+{
+   lp_build_name(x0, "pos.x");
+   lp_build_name(y0, "pos.y");
+
+   bld->attribs[0][0] = x0;
+   bld->attribs[0][1] = y0;
+}
+
+
+static void
+pos_update(struct lp_build_interp_soa_context *bld)
+{
+   LLVMValueRef x = bld->attribs[0][0];
+   LLVMValueRef y = bld->attribs[0][1];
+
+   if(bld->xstep)
+      x = lp_build_add(&bld->base, x, lp_build_const_scalar(bld->base.type, bld->xstep));
+
+   if(bld->ystep)
+      y = lp_build_add(&bld->base, y, lp_build_const_scalar(bld->base.type, bld->ystep));
+
+   lp_build_name(x, "pos.x");
+   lp_build_name(y, "pos.y");
+
+   bld->attribs[0][0] = x;
+   bld->attribs[0][1] = y;
+}
+
+
+void
+lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
+                         const struct tgsi_token *tokens,
+                         LLVMBuilderRef builder,
+                         union lp_type type,
+                         LLVMValueRef a0_ptr,
+                         LLVMValueRef dadx_ptr,
+                         LLVMValueRef dady_ptr,
+                         LLVMValueRef x0,
+                         LLVMValueRef y0,
+                         int xstep,
+                         int ystep)
+{
+   struct tgsi_parse_context parse;
+   struct tgsi_full_declaration *decl;
+
+   memset(bld, 0, sizeof *bld);
+
+   lp_build_context_init(&bld->base, builder, type);
+
+   /* For convenience */
+   bld->pos = bld->attribs[0];
+   bld->inputs = (const LLVMValueRef (*)[NUM_CHANNELS]) bld->attribs[1];
+
+   /* Position */
+   bld->num_attribs = 1;
+   bld->mask[0] = TGSI_WRITEMASK_ZW;
+   bld->mode[0] = TGSI_INTERPOLATE_LINEAR;
+
+   /* Inputs */
+   tgsi_parse_init( &parse, tokens );
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         decl = &parse.FullToken.FullDeclaration;
+         if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+            unsigned first, last, mask;
+            unsigned attrib;
+
+            first = decl->DeclarationRange.First;
+            last = decl->DeclarationRange.Last;
+            mask = decl->Declaration.UsageMask;
+
+            for( attrib = first; attrib <= last; ++attrib ) {
+               bld->mask[1 + attrib] = mask;
+               bld->mode[1 + attrib] = decl->Declaration.Interpolate;
+            }
+
+            bld->num_attribs = MAX2(bld->num_attribs, 1 + last + 1);
+         }
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         break;
+
+      default:
+         assert( 0 );
+      }
+   }
+   tgsi_parse_free( &parse );
+
+   coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
+
+   pos_init(bld, x0, y0);
+
+   attribs_init(bld);
+
+   bld->xstep = xstep;
+   bld->ystep = ystep;
+
+   coeffs_update(bld);
+}
+
+
+/**
+ * Advance the position and inputs with the xstep and ystep.
+ */
+void
+lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld)
+{
+   pos_update(bld);
+
+   attribs_update(bld);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
new file mode 100644
index 0000000..9194f62
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -0,0 +1,99 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Position and shader input interpolation.
+ *
+ * Special attention is given to the interpolation of side by side quads.
+ * Multiplications are made only for the first quad. Interpolation of
+ * inputs for posterior quads are done exclusively with additions, and
+ * perspective divide if necessary.
+ *
+ * @author Jose Fonseca <jfonseca at vmware.com>
+ */
+
+#ifndef LP_BLD_INTERP_H
+#define LP_BLD_INTERP_H
+
+
+#include <llvm-c/Core.h>
+
+#include "tgsi/tgsi_exec.h"
+
+#include "lp_bld_type.h"
+
+
+struct tgsi_token;
+
+
+struct lp_build_interp_soa_context
+{
+   struct lp_build_context base;
+
+   unsigned num_attribs;
+   unsigned mask[1 + PIPE_MAX_SHADER_INPUTS];
+   unsigned mode[1 + PIPE_MAX_SHADER_INPUTS];
+
+   LLVMValueRef a0  [1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+   LLVMValueRef dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+   LLVMValueRef dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+
+   int xstep;
+   int ystep;
+
+   /* Attribute values before perspective divide */
+   LLVMValueRef attribs_pre[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+
+   LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+
+   /*
+    * Convenience pointers. Callers may access this one.
+    */
+   const LLVMValueRef *pos;
+   const LLVMValueRef (*inputs)[NUM_CHANNELS];
+};
+
+
+void
+lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
+                         const struct tgsi_token *tokens,
+                         LLVMBuilderRef builder,
+                         union lp_type type,
+                         LLVMValueRef a0_ptr,
+                         LLVMValueRef dadx_ptr,
+                         LLVMValueRef dady_ptr,
+                         LLVMValueRef x0,
+                         LLVMValueRef y0,
+                         int xstep,
+                         int ystep);
+
+void
+lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld);
+
+
+#endif /* LP_BLD_INTERP_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
index 86380a1..d42ab99 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
@@ -54,11 +54,9 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
                   const struct tgsi_token *tokens,
                   union lp_type type,
                   struct lp_build_mask_context *mask,
-                  LLVMValueRef *pos,
-                  LLVMValueRef a0_ptr,
-                  LLVMValueRef dadx_ptr,
-                  LLVMValueRef dady_ptr,
                   LLVMValueRef consts_ptr,
+                  const LLVMValueRef *pos,
+                  const LLVMValueRef (*inputs)[4],
                   LLVMValueRef (*outputs)[4],
                   LLVMValueRef samplers_ptr);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 60cf5e9..1335ba8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -83,19 +83,12 @@ struct lp_build_tgsi_soa_context
 {
    struct lp_build_context base;
 
-   LLVMValueRef x, y, w;
-   LLVMValueRef a0_ptr;
-   LLVMValueRef dadx_ptr;
-   LLVMValueRef dady_ptr;
-
    LLVMValueRef consts_ptr;
+   const LLVMValueRef *pos;
+   const LLVMValueRef (*inputs)[NUM_CHANNELS];
    LLVMValueRef (*outputs)[NUM_CHANNELS];
    LLVMValueRef samplers_ptr;
 
-   LLVMValueRef oow;
-
-   LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
-
    LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
    LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
 
@@ -1350,93 +1343,16 @@ emit_instruction(
    return 1;
 }
 
-static void
-emit_declaration(
-   struct lp_build_tgsi_soa_context *bld,
-   struct tgsi_full_declaration *decl )
-{
-   if( decl->Declaration.File == TGSI_FILE_INPUT ) {
-      LLVMBuilderRef builder = bld->base.builder;
-      unsigned first, last, mask;
-      unsigned attrib, chan;
-
-      first = decl->DeclarationRange.First;
-      last = decl->DeclarationRange.Last;
-      mask = decl->Declaration.UsageMask;
-
-      for( attrib = first; attrib <= last; attrib++ ) {
-         for( chan = 0; chan < NUM_CHANNELS; chan++ ) {
-            LLVMValueRef input = bld->base.undef;
-
-            if( mask & (1 << chan) ) {
-               LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), (1 + attrib)*NUM_CHANNELS + chan, 0);
-               LLVMValueRef a0;
-               LLVMValueRef dadx;
-               LLVMValueRef dady;
-
-               switch( decl->Declaration.Interpolate ) {
-               case TGSI_INTERPOLATE_PERSPECTIVE:
-                  /* fall-through */
-
-               case TGSI_INTERPOLATE_LINEAR: {
-                  LLVMValueRef dadx_ptr = LLVMBuildGEP(builder, bld->dadx_ptr, &index, 1, "");
-                  LLVMValueRef dady_ptr = LLVMBuildGEP(builder, bld->dady_ptr, &index, 1, "");
-                  dadx = LLVMBuildLoad(builder, dadx_ptr, "");
-                  dady = LLVMBuildLoad(builder, dady_ptr, "");
-                  dadx = lp_build_broadcast_scalar(&bld->base, dadx);
-                  dady = lp_build_broadcast_scalar(&bld->base, dady);
-                  lp_build_name(dadx, "dadx_%u.%c", attrib, "xyzw"[chan]);
-                  lp_build_name(dady, "dady_%u.%c", attrib, "xyzw"[chan]);
-                  /* fall-through */
-               }
-
-               case TGSI_INTERPOLATE_CONSTANT: {
-                  LLVMValueRef a0_ptr = LLVMBuildGEP(builder, bld->a0_ptr, &index, 1, "");
-                  a0 = LLVMBuildLoad(builder, a0_ptr, "");
-                  a0 = lp_build_broadcast_scalar(&bld->base, a0);
-                  lp_build_name(a0, "a0_%u.%c", attrib, "xyzw"[chan]);
-                  break;
-               }
-
-               default:
-                  assert(0);
-                  break;
-               }
-
-               input = a0;
-
-               if (decl->Declaration.Interpolate != TGSI_INTERPOLATE_CONSTANT) {
-                  input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->x, dadx));
-                  input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->y, dady));
-               }
-
-               if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
-                  if(!bld->oow)
-                     bld->oow = lp_build_rcp(&bld->base, bld->w);
-                  input = lp_build_mul(&bld->base, input, bld->oow);
-               }
-
-               lp_build_name(input, "input%u.%c", attrib, "xyzw"[chan]);
-            }
-
-            bld->inputs[attrib][chan] = input;
-         }
-      }
-   }
-}
-
 
 void
 lp_build_tgsi_soa(LLVMBuilderRef builder,
                   const struct tgsi_token *tokens,
                   union lp_type type,
                   struct lp_build_mask_context *mask,
-                  LLVMValueRef *pos,
-                  LLVMValueRef a0_ptr,
-                  LLVMValueRef dadx_ptr,
-                  LLVMValueRef dady_ptr,
                   LLVMValueRef consts_ptr,
-                  LLVMValueRef (*outputs)[4],
+                  const LLVMValueRef *pos,
+                  const LLVMValueRef (*inputs)[NUM_CHANNELS],
+                  LLVMValueRef (*outputs)[NUM_CHANNELS],
                   LLVMValueRef samplers_ptr)
 {
    struct lp_build_tgsi_soa_context bld;
@@ -1448,12 +1364,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
    memset(&bld, 0, sizeof bld);
    lp_build_context_init(&bld.base, builder, type);
    bld.mask = mask;
-   bld.x = pos[0];
-   bld.y = pos[1];
-   bld.w = pos[3];
-   bld.a0_ptr = a0_ptr;
-   bld.dadx_ptr = dadx_ptr;
-   bld.dady_ptr = dady_ptr;
+   bld.pos = pos;
+   bld.inputs = inputs;
    bld.outputs = outputs;
    bld.consts_ptr = consts_ptr;
    bld.samplers_ptr = samplers_ptr;
@@ -1465,9 +1377,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
 
       switch( parse.FullToken.Token.Type ) {
       case TGSI_TOKEN_TYPE_DECLARATION:
-         if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
-            emit_declaration( &bld, &parse.FullToken.FullDeclaration );
-         }
+         /* Input already interpolated */
          break;
 
       case TGSI_TOKEN_TYPE_INSTRUCTION:
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index e639f9c..361b306 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -69,9 +69,11 @@
 #include "tgsi/tgsi_scan.h"
 #include "tgsi/tgsi_parse.h"
 #include "lp_bld_type.h"
+#include "lp_bld_const.h"
 #include "lp_bld_conv.h"
 #include "lp_bld_logic.h"
 #include "lp_bld_depth.h"
+#include "lp_bld_interp.h"
 #include "lp_bld_tgsi.h"
 #include "lp_bld_alpha.h"
 #include "lp_bld_blend.h"
@@ -88,22 +90,16 @@ static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
 static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
 
 
-/**
- * Generate the position vectors.
- *
- * TODO: This should be called only once per fragment pipeline, for the first
- * quad, and the neighboring quad positions obtained by additions.
- *
- * Parameter x, y are the integer values with the quad upper left coordinates.
+/*
+ * Derive from the quad's upper left scalar coordinates the coordinates for
+ * all other quad pixels
  */
 static void
-generate_pos(LLVMBuilderRef builder,
-             LLVMValueRef x,
-             LLVMValueRef y,
-             LLVMValueRef a0_ptr,
-             LLVMValueRef dadx_ptr,
-             LLVMValueRef dady_ptr,
-             LLVMValueRef *pos)
+generate_pos0(LLVMBuilderRef builder,
+              LLVMValueRef x,
+              LLVMValueRef y,
+              LLVMValueRef *x0,
+              LLVMValueRef *y0)
 {
    LLVMTypeRef int_elem_type = LLVMInt32Type();
    LLVMTypeRef int_vec_type = LLVMVectorType(int_elem_type, QUAD_SIZE);
@@ -111,14 +107,8 @@ generate_pos(LLVMBuilderRef builder,
    LLVMTypeRef vec_type = LLVMVectorType(elem_type, QUAD_SIZE);
    LLVMValueRef x_offsets[QUAD_SIZE];
    LLVMValueRef y_offsets[QUAD_SIZE];
-   unsigned chan;
    unsigned i;
 
-   /*
-    * Derive from the quad's upper left scalar coordinates the coordinates for
-    * all other quad pixels
-    */
-
    x = lp_build_broadcast(builder, int_vec_type, x);
    y = lp_build_broadcast(builder, int_vec_type, y);
 
@@ -130,33 +120,8 @@ generate_pos(LLVMBuilderRef builder,
    x = LLVMBuildAdd(builder, x, LLVMConstVector(x_offsets, QUAD_SIZE), "");
    y = LLVMBuildAdd(builder, y, LLVMConstVector(y_offsets, QUAD_SIZE), "");
 
-   x = LLVMBuildSIToFP(builder, x, vec_type, "");
-   y = LLVMBuildSIToFP(builder, y, vec_type, "");
-
-   pos[0] = x;
-   pos[1] = y;
-
-   /* 
-    * Calculate z and w from the interpolation factors.
-    */
-
-   for(chan = 2; chan < NUM_CHANNELS; ++chan) {
-      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
-      LLVMValueRef a0   = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr,   &index, 1, ""), "");
-      LLVMValueRef dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
-      LLVMValueRef dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
-      LLVMValueRef res;
-      a0   = lp_build_broadcast(builder, vec_type, a0);
-      dadx = lp_build_broadcast(builder, vec_type, dadx);
-      dady = lp_build_broadcast(builder, vec_type, dady);
-      res = a0;
-      res = LLVMBuildAdd(builder, res, LLVMBuildMul(builder, dadx, x, ""), "");
-      res = LLVMBuildAdd(builder, res, LLVMBuildMul(builder, dady, y, ""), "");
-      pos[chan] = res;
-   }
-
-   for(chan = 0; chan < NUM_CHANNELS; ++chan)
-      lp_build_name(pos[chan], "pos.%c", "xyzw"[chan]);
+   *x0 = LLVMBuildSIToFP(builder, x, vec_type, "");
+   *y0 = LLVMBuildSIToFP(builder, y, vec_type, "");
 }
 
 
@@ -218,11 +183,7 @@ generate_fs(struct llvmpipe_context *lp,
             union lp_type type,
             LLVMValueRef context_ptr,
             unsigned i,
-            LLVMValueRef x,
-            LLVMValueRef y,
-            LLVMValueRef a0_ptr,
-            LLVMValueRef dadx_ptr,
-            LLVMValueRef dady_ptr,
+            const struct lp_build_interp_soa_context *interp,
             LLVMValueRef *pmask,
             LLVMValueRef *color,
             LLVMValueRef depth_ptr)
@@ -233,8 +194,8 @@ generate_fs(struct llvmpipe_context *lp,
    LLVMTypeRef int_vec_type;
    LLVMValueRef consts_ptr;
    LLVMValueRef samplers_ptr;
-   LLVMValueRef pos[NUM_CHANNELS];
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+   LLVMValueRef z = interp->pos[2];
    struct lp_build_mask_context mask;
    boolean early_depth_test;
    unsigned attrib;
@@ -247,8 +208,6 @@ generate_fs(struct llvmpipe_context *lp,
    consts_ptr = lp_jit_context_constants(builder, context_ptr);
    samplers_ptr = lp_jit_context_samplers(builder, context_ptr);
 
-   generate_pos(builder, x, y, a0_ptr, dadx_ptr, dady_ptr, pos);
-
    lp_build_mask_begin(&mask, builder, type, *pmask);
 
    early_depth_test =
@@ -260,14 +219,14 @@ generate_fs(struct llvmpipe_context *lp,
 
    if(early_depth_test)
       generate_depth(lp, builder, &key->depth,
-                          type, &mask,
-                          pos[2], depth_ptr);
+                     type, &mask,
+                     z, depth_ptr);
 
    memset(outputs, 0, sizeof outputs);
 
    lp_build_tgsi_soa(builder, tokens, type, &mask,
-                     pos, a0_ptr, dadx_ptr, dady_ptr,
-                     consts_ptr, outputs, samplers_ptr);
+                     consts_ptr, interp->pos, interp->inputs,
+                     outputs, samplers_ptr);
 
    for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
       for(chan = 0; chan < NUM_CHANNELS; ++chan) {
@@ -300,7 +259,7 @@ generate_fs(struct llvmpipe_context *lp,
 
             case TGSI_SEMANTIC_POSITION:
                if(chan == 2)
-                  pos[2] = outputs[attrib][chan];
+                  z = outputs[attrib][chan];
                break;
             }
          }
@@ -309,8 +268,8 @@ generate_fs(struct llvmpipe_context *lp,
 
    if(!early_depth_test)
       generate_depth(lp, builder, &key->depth,
-                          type, &mask,
-                          pos[2], depth_ptr);
+                     type, &mask,
+                     z, depth_ptr);
 
    lp_build_mask_end(&mask);
 
@@ -400,6 +359,9 @@ generate_fragment(struct llvmpipe_context *lp,
    LLVMValueRef depth_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
+   LLVMValueRef x0;
+   LLVMValueRef y0;
+   struct lp_build_interp_soa_context interp;
    LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH];
    LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH];
    LLVMValueRef blend_mask;
@@ -516,14 +478,19 @@ generate_fragment(struct llvmpipe_context *lp,
    builder = LLVMCreateBuilder();
    LLVMPositionBuilderAtEnd(builder, block);
 
+   generate_pos0(builder, x, y, &x0, &y0);
+
+   lp_build_interp_soa_init(&interp, shader->base.tokens, builder, fs_type,
+                            a0_ptr, dadx_ptr, dady_ptr,
+                            x0, y0, 2, 0);
+
    for(i = 0; i < num_fs; ++i) {
       LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
       LLVMValueRef out_color[NUM_CHANNELS];
-      LLVMValueRef x_i;
       LLVMValueRef depth_ptr_i;
 
-      /* TODO: Reuse position interpolation */
-      x_i = LLVMBuildAdd(builder, x, LLVMConstInt(LLVMInt32Type(), 2*i, 0), "");
+      if(i != 0)
+         lp_build_interp_soa_update(&interp);
 
       fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), "");
       depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, "");
@@ -533,8 +500,7 @@ generate_fragment(struct llvmpipe_context *lp,
                   fs_type,
                   context_ptr,
                   i,
-                  x_i, y,
-                  a0_ptr, dadx_ptr, dady_ptr,
+                  &interp,
                   &fs_mask[i],
                   out_color,
                   depth_ptr_i);




More information about the mesa-commit mailing list