[Mesa-dev] [PATCH] softpipe: Take all lods into account when texture sampling.

Jose Fonseca jfonseca at vmware.com
Tue Jun 19 14:46:35 PDT 2012


Could you give more background on why is this necessary?

This will make software renderering slower, so I'd really like to avoid it on llvmpipe if at all possible.

Jose

----- Original Message -----
> This patch churns a lot because it needs to change 4-wide filters
> into
> single pixel filters, since each fragment may use a different filter.
> 
> The only case not entirely supported is the anisotropic filtering.
> Not sure what we want to do there, since a full quad is required by
> that filter.
> 
> Signed-off-by: Olivier Galibert <galibert at pobox.com>
> ---
>  src/gallium/drivers/softpipe/sp_tex_sample.c | 1383
>  ++++++++++++--------------
>  src/gallium/drivers/softpipe/sp_tex_sample.h |   28 +-
>  2 files changed, 645 insertions(+), 766 deletions(-)
> 
> piglit test that shows the problem is sleeping in the appropriate
> list.  llvmpipe fix is gonna be a tad more convoluted (I have to fix
> texturing in vertex shaders for a start :-)
> 
> diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c
> b/src/gallium/drivers/softpipe/sp_tex_sample.c
> index f29a6c7..292dc6e 100644
> --- a/src/gallium/drivers/softpipe/sp_tex_sample.c
> +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
> @@ -129,313 +129,263 @@ repeat(int coord, unsigned size)
>   * \return  integer texture index
>   */
>  static void
> -wrap_nearest_repeat(const float s[4], unsigned size, int icoord[4])
> +wrap_nearest_repeat(float s, unsigned size, int *icoord)
>  {
> -   uint ch;
>     /* s limited to [0,1) */
>     /* i limited to [0,size-1] */
> -   for (ch = 0; ch < 4; ch++) {
> -      int i = util_ifloor(s[ch] * size);
> -      icoord[ch] = repeat(i, size);
> -   }
> +   int i = util_ifloor(s * size);
> +   *icoord = repeat(i, size);
>  }
>  
>  
>  static void
> -wrap_nearest_clamp(const float s[4], unsigned size, int icoord[4])
> +wrap_nearest_clamp(float s, unsigned size, int *icoord)
>  {
> -   uint ch;
>     /* s limited to [0,1] */
>     /* i limited to [0,size-1] */
> -   for (ch = 0; ch < 4; ch++) {
> -      if (s[ch] <= 0.0F)
> -         icoord[ch] = 0;
> -      else if (s[ch] >= 1.0F)
> -         icoord[ch] = size - 1;
> -      else
> -         icoord[ch] = util_ifloor(s[ch] * size);
> -   }
> +   if (s <= 0.0F)
> +      *icoord = 0;
> +   else if (s >= 1.0F)
> +      *icoord = size - 1;
> +   else
> +      *icoord = util_ifloor(s * size);
>  }
>  
>  
>  static void
> -wrap_nearest_clamp_to_edge(const float s[4], unsigned size, int
> icoord[4])
> +wrap_nearest_clamp_to_edge(float s, unsigned size, int *icoord)
>  {
> -   uint ch;
>     /* s limited to [min,max] */
>     /* i limited to [0, size-1] */
>     const float min = 1.0F / (2.0F * size);
>     const float max = 1.0F - min;
> -   for (ch = 0; ch < 4; ch++) {
> -      if (s[ch] < min)
> -         icoord[ch] = 0;
> -      else if (s[ch] > max)
> -         icoord[ch] = size - 1;
> -      else
> -         icoord[ch] = util_ifloor(s[ch] * size);
> -   }
> +   if (s < min)
> +      *icoord = 0;
> +   else if (s > max)
> +      *icoord = size - 1;
> +   else
> +      *icoord = util_ifloor(s * size);
>  }
>  
>  
>  static void
> -wrap_nearest_clamp_to_border(const float s[4], unsigned size, int
> icoord[4])
> +wrap_nearest_clamp_to_border(float s, unsigned size, int *icoord)
>  {
> -   uint ch;
>     /* s limited to [min,max] */
>     /* i limited to [-1, size] */
>     const float min = -1.0F / (2.0F * size);
>     const float max = 1.0F - min;
> -   for (ch = 0; ch < 4; ch++) {
> -      if (s[ch] <= min)
> -         icoord[ch] = -1;
> -      else if (s[ch] >= max)
> -         icoord[ch] = size;
> -      else
> -         icoord[ch] = util_ifloor(s[ch] * size);
> -   }
> +   if (s <= min)
> +      *icoord = -1;
> +   else if (s >= max)
> +      *icoord = size;
> +   else
> +      *icoord = util_ifloor(s * size);
>  }
>  
>  
>  static void
> -wrap_nearest_mirror_repeat(const float s[4], unsigned size, int
> icoord[4])
> +wrap_nearest_mirror_repeat(float s, unsigned size, int *icoord)
>  {
> -   uint ch;
>     const float min = 1.0F / (2.0F * size);
>     const float max = 1.0F - min;
> -   for (ch = 0; ch < 4; ch++) {
> -      const int flr = util_ifloor(s[ch]);
> -      float u = frac(s[ch]);
> -      if (flr & 1)
> -         u = 1.0F - u;
> -      if (u < min)
> -         icoord[ch] = 0;
> -      else if (u > max)
> -         icoord[ch] = size - 1;
> -      else
> -         icoord[ch] = util_ifloor(u * size);
> -   }
> +   const int flr = util_ifloor(s);
> +   float u = frac(s);
> +   if (flr & 1)
> +      u = 1.0F - u;
> +   if (u < min)
> +      *icoord = 0;
> +   else if (u > max)
> +      *icoord = size - 1;
> +   else
> +      *icoord = util_ifloor(u * size);
>  }
>  
>  
>  static void
> -wrap_nearest_mirror_clamp(const float s[4], unsigned size, int
> icoord[4])
> -{
> -   uint ch;
> -   for (ch = 0; ch < 4; ch++) {
> -      /* s limited to [0,1] */
> -      /* i limited to [0,size-1] */
> -      const float u = fabsf(s[ch]);
> -      if (u <= 0.0F)
> -         icoord[ch] = 0;
> -      else if (u >= 1.0F)
> -         icoord[ch] = size - 1;
> -      else
> -         icoord[ch] = util_ifloor(u * size);
> -   }
> +wrap_nearest_mirror_clamp(float s, unsigned size, int *icoord)
> +{
> +   /* s limited to [0,1] */
> +   /* i limited to [0,size-1] */
> +   const float u = fabsf(s);
> +   if (u <= 0.0F)
> +      *icoord = 0;
> +   else if (u >= 1.0F)
> +      *icoord = size - 1;
> +   else
> +      *icoord = util_ifloor(u * size);
>  }
>  
>  
>  static void
> -wrap_nearest_mirror_clamp_to_edge(const float s[4], unsigned size,
> -                                  int icoord[4])
> +wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int
> *icoord)
>  {
> -   uint ch;
>     /* s limited to [min,max] */
>     /* i limited to [0, size-1] */
>     const float min = 1.0F / (2.0F * size);
>     const float max = 1.0F - min;
> -   for (ch = 0; ch < 4; ch++) {
> -      const float u = fabsf(s[ch]);
> -      if (u < min)
> -         icoord[ch] = 0;
> -      else if (u > max)
> -         icoord[ch] = size - 1;
> -      else
> -         icoord[ch] = util_ifloor(u * size);
> -   }
> +   const float u = fabsf(s);
> +   if (u < min)
> +      *icoord = 0;
> +   else if (u > max)
> +      *icoord = size - 1;
> +   else
> +      *icoord = util_ifloor(u * size);
>  }
>  
>  
>  static void
> -wrap_nearest_mirror_clamp_to_border(const float s[4], unsigned size,
> -                                    int icoord[4])
> +wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int
> *icoord)
>  {
> -   uint ch;
>     /* s limited to [min,max] */
>     /* i limited to [0, size-1] */
>     const float min = -1.0F / (2.0F * size);
>     const float max = 1.0F - min;
> -   for (ch = 0; ch < 4; ch++) {
> -      const float u = fabsf(s[ch]);
> -      if (u < min)
> -         icoord[ch] = -1;
> -      else if (u > max)
> -         icoord[ch] = size;
> -      else
> -         icoord[ch] = util_ifloor(u * size);
> -   }
> +   const float u = fabsf(s);
> +   if (u < min)
> +      *icoord = -1;
> +   else if (u > max)
> +      *icoord = size;
> +   else
> +      *icoord = util_ifloor(u * size);
>  }
>  
>  
>  /**
> - * Used to compute texel locations for linear sampling for four
> texcoords.
> + * Used to compute texel locations for linear sampling
>   * \param wrapMode  PIPE_TEX_WRAP_x
> - * \param s  the texcoords
> + * \param s  the texcoord
>   * \param size  the texture image size
> - * \param icoord0  returns first texture indexes
> - * \param icoord1  returns second texture indexes (usually icoord0 +
> 1)
> - * \param w  returns blend factor/weight between texture indexes
> - * \param icoord  returns the computed integer texture coords
> + * \param icoord0  returns first texture index
> + * \param icoord1  returns second texture index (usually icoord0 +
> 1)
> + * \param w  returns blend factor/weight between texture indices
> + * \param icoord  returns the computed integer texture coord
>   */
>  static void
> -wrap_linear_repeat(const float s[4], unsigned size,
> -                   int icoord0[4], int icoord1[4], float w[4])
> -{
> -   uint ch;
> -   for (ch = 0; ch < 4; ch++) {
> -      float u = s[ch] * size - 0.5F;
> -      icoord0[ch] = repeat(util_ifloor(u), size);
> -      icoord1[ch] = repeat(icoord0[ch] + 1, size);
> -      w[ch] = frac(u);
> -   }
> +wrap_linear_repeat(float s, unsigned size,
> +                   int *icoord0, int *icoord1, float *w)
> +{
> +   float u = s * size - 0.5F;
> +   *icoord0 = repeat(util_ifloor(u), size);
> +   *icoord1 = repeat(*icoord0 + 1, size);
> +   *w = frac(u);
>  }
>  
>  
>  static void
> -wrap_linear_clamp(const float s[4], unsigned size,
> -                  int icoord0[4], int icoord1[4], float w[4])
> -{
> -   uint ch;
> -   for (ch = 0; ch < 4; ch++) {
> -      float u = CLAMP(s[ch], 0.0F, 1.0F);
> -      u = u * size - 0.5f;
> -      icoord0[ch] = util_ifloor(u);
> -      icoord1[ch] = icoord0[ch] + 1;
> -      w[ch] = frac(u);
> -   }
> +wrap_linear_clamp(float s, unsigned size,
> +                  int *icoord0, int *icoord1, float *w)
> +{
> +   float u = CLAMP(s, 0.0F, 1.0F);
> +   u = u * size - 0.5f;
> +   *icoord0 = util_ifloor(u);
> +   *icoord1 = *icoord0 + 1;
> +   *w = frac(u);
>  }
>  
>  
>  static void
> -wrap_linear_clamp_to_edge(const float s[4], unsigned size,
> -                          int icoord0[4], int icoord1[4], float
> w[4])
> -{
> -   uint ch;
> -   for (ch = 0; ch < 4; ch++) {
> -      float u = CLAMP(s[ch], 0.0F, 1.0F);
> -      u = u * size - 0.5f;
> -      icoord0[ch] = util_ifloor(u);
> -      icoord1[ch] = icoord0[ch] + 1;
> -      if (icoord0[ch] < 0)
> -         icoord0[ch] = 0;
> -      if (icoord1[ch] >= (int) size)
> -         icoord1[ch] = size - 1;
> -      w[ch] = frac(u);
> -   }
> +wrap_linear_clamp_to_edge(float s, unsigned size,
> +                          int *icoord0, int *icoord1, float *w)
> +{
> +   float u = CLAMP(s, 0.0F, 1.0F);
> +   u = u * size - 0.5f;
> +   *icoord0 = util_ifloor(u);
> +   *icoord1 = *icoord0 + 1;
> +   if (*icoord0 < 0)
> +      *icoord0 = 0;
> +   if (*icoord1 >= (int) size)
> +      *icoord1 = size - 1;
> +   *w = frac(u);
>  }
>  
>  
>  static void
> -wrap_linear_clamp_to_border(const float s[4], unsigned size,
> -                            int icoord0[4], int icoord1[4], float
> w[4])
> +wrap_linear_clamp_to_border(float s, unsigned size,
> +                            int *icoord0, int *icoord1, float *w)
>  {
>     const float min = -1.0F / (2.0F * size);
>     const float max = 1.0F - min;
> -   uint ch;
> -   for (ch = 0; ch < 4; ch++) {
> -      float u = CLAMP(s[ch], min, max);
> -      u = u * size - 0.5f;
> -      icoord0[ch] = util_ifloor(u);
> -      icoord1[ch] = icoord0[ch] + 1;
> -      w[ch] = frac(u);
> -   }
> +   float u = CLAMP(s, min, max);
> +   u = u * size - 0.5f;
> +   *icoord0 = util_ifloor(u);
> +   *icoord1 = *icoord0 + 1;
> +   *w = frac(u);
>  }
>  
>  
>  static void
> -wrap_linear_mirror_repeat(const float s[4], unsigned size,
> -                          int icoord0[4], int icoord1[4], float
> w[4])
> -{
> -   uint ch;
> -   for (ch = 0; ch < 4; ch++) {
> -      const int flr = util_ifloor(s[ch]);
> -      float u = frac(s[ch]);
> -      if (flr & 1)
> -         u = 1.0F - u;
> -      u = u * size - 0.5F;
> -      icoord0[ch] = util_ifloor(u);
> -      icoord1[ch] = icoord0[ch] + 1;
> -      if (icoord0[ch] < 0)
> -         icoord0[ch] = 0;
> -      if (icoord1[ch] >= (int) size)
> -         icoord1[ch] = size - 1;
> -      w[ch] = frac(u);
> -   }
> +wrap_linear_mirror_repeat(float s, unsigned size,
> +                          int *icoord0, int *icoord1, float *w)
> +{
> +   const int flr = util_ifloor(s);
> +   float u = frac(s);
> +   if (flr & 1)
> +      u = 1.0F - u;
> +   u = u * size - 0.5F;
> +   *icoord0 = util_ifloor(u);
> +   *icoord1 = *icoord0 + 1;
> +   if (*icoord0 < 0)
> +      *icoord0 = 0;
> +   if (*icoord1 >= (int) size)
> +      *icoord1 = size - 1;
> +   *w = frac(u);
>  }
>  
>  
>  static void
> -wrap_linear_mirror_clamp(const float s[4], unsigned size,
> -                         int icoord0[4], int icoord1[4], float w[4])
> -{
> -   uint ch;
> -   for (ch = 0; ch < 4; ch++) {
> -      float u = fabsf(s[ch]);
> -      if (u >= 1.0F)
> -         u = (float) size;
> -      else
> -         u *= size;
> -      u -= 0.5F;
> -      icoord0[ch] = util_ifloor(u);
> -      icoord1[ch] = icoord0[ch] + 1;
> -      w[ch] = frac(u);
> -   }
> +wrap_linear_mirror_clamp(float s, unsigned size,
> +                         int *icoord0, int *icoord1, float *w)
> +{
> +   float u = fabsf(s);
> +   if (u >= 1.0F)
> +      u = (float) size;
> +   else
> +      u *= size;
> +   u -= 0.5F;
> +   *icoord0 = util_ifloor(u);
> +   *icoord1 = *icoord0 + 1;
> +   *w = frac(u);
>  }
>  
>  
>  static void
> -wrap_linear_mirror_clamp_to_edge(const float s[4], unsigned size,
> -                                 int icoord0[4], int icoord1[4],
> float w[4])
> -{
> -   uint ch;
> -   for (ch = 0; ch < 4; ch++) {
> -      float u = fabsf(s[ch]);
> -      if (u >= 1.0F)
> -         u = (float) size;
> -      else
> -         u *= size;
> -      u -= 0.5F;
> -      icoord0[ch] = util_ifloor(u);
> -      icoord1[ch] = icoord0[ch] + 1;
> -      if (icoord0[ch] < 0)
> -         icoord0[ch] = 0;
> -      if (icoord1[ch] >= (int) size)
> -         icoord1[ch] = size - 1;
> -      w[ch] = frac(u);
> -   }
> +wrap_linear_mirror_clamp_to_edge(float s, unsigned size,
> +                                 int *icoord0, int *icoord1, float
> *w)
> +{
> +   float u = fabsf(s);
> +   if (u >= 1.0F)
> +      u = (float) size;
> +   else
> +      u *= size;
> +   u -= 0.5F;
> +   *icoord0 = util_ifloor(u);
> +   *icoord1 = *icoord0 + 1;
> +   if (*icoord0 < 0)
> +      *icoord0 = 0;
> +   if (*icoord1 >= (int) size)
> +      *icoord1 = size - 1;
> +   *w = frac(u);
>  }
>  
>  
>  static void
> -wrap_linear_mirror_clamp_to_border(const float s[4], unsigned size,
> -                                   int icoord0[4], int icoord1[4],
> float w[4])
> +wrap_linear_mirror_clamp_to_border(float s, unsigned size,
> +                                   int *icoord0, int *icoord1, float
> *w)
>  {
>     const float min = -1.0F / (2.0F * size);
>     const float max = 1.0F - min;
> -   uint ch;
> -   for (ch = 0; ch < 4; ch++) {
> -      float u = fabsf(s[ch]);
> -      if (u <= min)
> -         u = min * size;
> -      else if (u >= max)
> -         u = max * size;
> -      else
> -         u *= size;
> -      u -= 0.5F;
> -      icoord0[ch] = util_ifloor(u);
> -      icoord1[ch] = icoord0[ch] + 1;
> -      w[ch] = frac(u);
> -   }
> +   float u = fabsf(s);
> +   if (u <= min)
> +      u = min * size;
> +   else if (u >= max)
> +      u = max * size;
> +   else
> +      u *= size;
> +   u -= 0.5F;
> +   *icoord0 = util_ifloor(u);
> +   *icoord1 = *icoord0 + 1;
> +   *w = frac(u);
>  }
>  
>  
> @@ -443,13 +393,10 @@ wrap_linear_mirror_clamp_to_border(const float
> s[4], unsigned size,
>   * PIPE_TEX_WRAP_CLAMP for nearest sampling, unnormalized coords.
>   */
>  static void
> -wrap_nearest_unorm_clamp(const float s[4], unsigned size, int
> icoord[4])
> +wrap_nearest_unorm_clamp(float s, unsigned size, int *icoord)
>  {
> -   uint ch;
> -   for (ch = 0; ch < 4; ch++) {
> -      int i = util_ifloor(s[ch]);
> -      icoord[ch]= CLAMP(i, 0, (int) size-1);
> -   }
> +   int i = util_ifloor(s);
> +   *icoord = CLAMP(i, 0, (int) size-1);
>  }
>  
>  
> @@ -457,13 +404,9 @@ wrap_nearest_unorm_clamp(const float s[4],
> unsigned size, int icoord[4])
>   * PIPE_TEX_WRAP_CLAMP_TO_BORDER for nearest sampling, unnormalized
>   coords.
>   */
>  static void
> -wrap_nearest_unorm_clamp_to_border(const float s[4], unsigned size,
> -                                   int icoord[4])
> +wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int
> *icoord)
>  {
> -   uint ch;
> -   for (ch = 0; ch < 4; ch++) {
> -      icoord[ch]= util_ifloor( CLAMP(s[ch], -0.5F, (float) size +
> 0.5F) );
> -   }
> +   *icoord = util_ifloor( CLAMP(s, -0.5F, (float) size + 0.5F) );
>  }
>  
>  
> @@ -471,13 +414,9 @@ wrap_nearest_unorm_clamp_to_border(const float
> s[4], unsigned size,
>   * PIPE_TEX_WRAP_CLAMP_TO_EDGE for nearest sampling, unnormalized
>   coords.
>   */
>  static void
> -wrap_nearest_unorm_clamp_to_edge(const float s[4], unsigned size,
> -                                 int icoord[4])
> +wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int
> *icoord)
>  {
> -   uint ch;
> -   for (ch = 0; ch < 4; ch++) {
> -      icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size -
> 0.5F) );
> -   }
> +   *icoord = util_ifloor( CLAMP(s, 0.5F, (float) size - 0.5F) );
>  }
>  
>  
> @@ -485,17 +424,14 @@ wrap_nearest_unorm_clamp_to_edge(const float
> s[4], unsigned size,
>   * PIPE_TEX_WRAP_CLAMP for linear sampling, unnormalized coords.
>   */
>  static void
> -wrap_linear_unorm_clamp(const float s[4], unsigned size,
> -                        int icoord0[4], int icoord1[4], float w[4])
> -{
> -   uint ch;
> -   for (ch = 0; ch < 4; ch++) {
> -      /* Not exactly what the spec says, but it matches NVIDIA
> output */
> -      float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
> -      icoord0[ch] = util_ifloor(u);
> -      icoord1[ch] = icoord0[ch] + 1;
> -      w[ch] = frac(u);
> -   }
> +wrap_linear_unorm_clamp(float s, unsigned size,
> +                        int *icoord0, int *icoord1, float *w)
> +{
> +   /* Not exactly what the spec says, but it matches NVIDIA output
> */
> +   float u = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f);
> +   *icoord0 = util_ifloor(u);
> +   *icoord1 = *icoord0 + 1;
> +   *w = frac(u);
>  }
>  
>  
> @@ -503,19 +439,16 @@ wrap_linear_unorm_clamp(const float s[4],
> unsigned size,
>   * PIPE_TEX_WRAP_CLAMP_TO_BORDER for linear sampling, unnormalized
>   coords.
>   */
>  static void
> -wrap_linear_unorm_clamp_to_border(const float s[4], unsigned size,
> -                                  int icoord0[4], int icoord1[4],
> float w[4])
> -{
> -   uint ch;
> -   for (ch = 0; ch < 4; ch++) {
> -      float u = CLAMP(s[ch], -0.5F, (float) size + 0.5F);
> -      u -= 0.5F;
> -      icoord0[ch] = util_ifloor(u);
> -      icoord1[ch] = icoord0[ch] + 1;
> -      if (icoord1[ch] > (int) size - 1)
> -         icoord1[ch] = size - 1;
> -      w[ch] = frac(u);
> -   }
> +wrap_linear_unorm_clamp_to_border(float s, unsigned size,
> +                                  int *icoord0, int *icoord1, float
> *w)
> +{
> +   float u = CLAMP(s, -0.5F, (float) size + 0.5F);
> +   u -= 0.5F;
> +   *icoord0 = util_ifloor(u);
> +   *icoord1 = *icoord0 + 1;
> +   if (*icoord1 > (int) size - 1)
> +      *icoord1 = size - 1;
> +   *w = frac(u);
>  }
>  
>  
> @@ -523,19 +456,16 @@ wrap_linear_unorm_clamp_to_border(const float
> s[4], unsigned size,
>   * PIPE_TEX_WRAP_CLAMP_TO_EDGE for linear sampling, unnormalized
>   coords.
>   */
>  static void
> -wrap_linear_unorm_clamp_to_edge(const float s[4], unsigned size,
> -                                int icoord0[4], int icoord1[4],
> float w[4])
> -{
> -   uint ch;
> -   for (ch = 0; ch < 4; ch++) {
> -      float u = CLAMP(s[ch], +0.5F, (float) size - 0.5F);
> -      u -= 0.5F;
> -      icoord0[ch] = util_ifloor(u);
> -      icoord1[ch] = icoord0[ch] + 1;
> -      if (icoord1[ch] > (int) size - 1)
> -         icoord1[ch] = size - 1;
> -      w[ch] = frac(u);
> -   }
> +wrap_linear_unorm_clamp_to_edge(float s, unsigned size,
> +                                int *icoord0, int *icoord1, float
> *w)
> +{
> +   float u = CLAMP(s, +0.5F, (float) size - 0.5F);
> +   u -= 0.5F;
> +   *icoord0 = util_ifloor(u);
> +   *icoord1 = *icoord0 + 1;
> +   if (*icoord1 > (int) size - 1)
> +      *icoord1 = size - 1;
> +   *w = frac(u);
>  }
>  
>  
> @@ -543,13 +473,10 @@ wrap_linear_unorm_clamp_to_edge(const float
> s[4], unsigned size,
>   * Do coordinate to array index conversion.  For array textures.
>   */
>  static INLINE void
> -wrap_array_layer(const float coord[4], unsigned size, int layer[4])
> +wrap_array_layer(float coord, unsigned size, int *layer)
>  {
> -   uint ch;
> -   for (ch = 0; ch < 4; ch++) {
> -      int c = util_ifloor(coord[ch] + 0.5F);
> -      layer[ch] = CLAMP(c, 0, size - 1);
> -   }
> +   int c = util_ifloor(coord + 0.5F);
> +   *layer = CLAMP(c, 0, size - 1);
>  }
>  
>  
> @@ -640,7 +567,7 @@ compute_lambda_vert(const struct
> sp_sampler_variant *samp,
>   * \param rgba  the quad to put the texel/color into
>   *
>   * XXX maybe move this into sp_tex_tile_cache.c and merge with the
> - * sp_get_cached_tile_tex() function.  Also, get 4 texels instead of
> 1...
> + * sp_get_cached_tile_tex() function.
>   */
>  
>  
> @@ -827,7 +754,16 @@ pot_level_size(unsigned base_pot, unsigned
> level)
>  
>  
>  static void
> -print_sample(const char *function, float
> rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
> +print_sample(const char *function, const float *rgba)
> +{
> +   debug_printf("%s %g %g %g %g\n",
> +                function,
> +                rgba[0], rgba[TGSI_NUM_CHANNELS],
> rgba[2*TGSI_NUM_CHANNELS], rgba[3*TGSI_NUM_CHANNELS]);
> +}
> +
> +
> +static void
> +print_sample_4(const char *function, float
> rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
>  {
>     debug_printf("%s %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g
>     %g\n",
>                  function,
> @@ -837,64 +773,61 @@ print_sample(const char *function, float
> rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]
>                  rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
>  }
>  
> -
>  /* Some image-filter fastpaths:
>   */
>  static INLINE void
>  img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
> -                                const float s[TGSI_QUAD_SIZE],
> -                                const float t[TGSI_QUAD_SIZE],
> -                                const float p[TGSI_QUAD_SIZE],
> -                                const float c0[TGSI_QUAD_SIZE],
> +                                float s,
> +                                float t,
> +                                float p,
> +				unsigned level,
> +                                unsigned face_id,
>                                  enum tgsi_sampler_control control,
> -                                float
> rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
> +                                float *rgba)
>  {
>     const struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
> -   unsigned  j;
> -   unsigned level = samp->level;
>     unsigned xpot = pot_level_size(samp->xpot, level);
>     unsigned ypot = pot_level_size(samp->ypot, level);
>     unsigned xmax = (xpot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE,
>     xpot) - 1; */
>     unsigned ymax = (ypot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE,
>     ypot) - 1; */
>     union tex_tile_address addr;
> +   int c;
>  
> -   addr.value = 0;
> -   addr.bits.level = samp->level;
>  
> -   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> -      int c;
>  
> -      float u = s[j] * xpot - 0.5F;
> -      float v = t[j] * ypot - 0.5F;
> +   float u = s * xpot - 0.5F;
> +   float v = t * ypot - 0.5F;
>  
> -      int uflr = util_ifloor(u);
> -      int vflr = util_ifloor(v);
> +   int uflr = util_ifloor(u);
> +   int vflr = util_ifloor(v);
>  
> -      float xw = u - (float)uflr;
> -      float yw = v - (float)vflr;
> +   float xw = u - (float)uflr;
> +   float yw = v - (float)vflr;
>  
> -      int x0 = uflr & (xpot - 1);
> -      int y0 = vflr & (ypot - 1);
> +   int x0 = uflr & (xpot - 1);
> +   int y0 = vflr & (ypot - 1);
>  
> -      const float *tx[4];
> +   const float *tx[4];
>        
> -      /* Can we fetch all four at once:
> -       */
> -      if (x0 < xmax && y0 < ymax) {
> -         get_texel_quad_2d_no_border_single_tile(samp, addr, x0, y0,
> tx);
> -      }
> -      else {
> -         unsigned x1 = (x0 + 1) & (xpot - 1);
> -         unsigned y1 = (y0 + 1) & (ypot - 1);
> -         get_texel_quad_2d_no_border(samp, addr, x0, y0, x1, y1,
> tx);
> -      }
> +   addr.value = 0;
> +   addr.bits.level = level;
>  
> -      /* interpolate R, G, B, A */
> -      for (c = 0; c < 4; c++) {
> -         rgba[c][j] = lerp_2d(xw, yw,
> -                              tx[0][c], tx[1][c],
> -                              tx[2][c], tx[3][c]);
> -      }
> +   /* Can we fetch all four at once:
> +    */
> +   if (x0 < xmax && y0 < ymax) {
> +      get_texel_quad_2d_no_border_single_tile(samp, addr, x0, y0,
> tx);
> +   }
> +   else {
> +      unsigned x1 = (x0 + 1) & (xpot - 1);
> +      unsigned y1 = (y0 + 1) & (ypot - 1);
> +      get_texel_quad_2d_no_border(samp, addr, x0, y0, x1, y1, tx);
> +   }
> +
> +   /* interpolate R, G, B, A */
> +   for (c = 0; c < TGSI_QUAD_SIZE; c++) {
> +      rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
> +                                       tx[0][c], tx[1][c],
> +                                       tx[2][c], tx[3][c]);
>     }
>  
>     if (DEBUG_TEX) {
> @@ -905,41 +838,36 @@ img_filter_2d_linear_repeat_POT(struct
> tgsi_sampler *tgsi_sampler,
>  
>  static INLINE void
>  img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
> -                                 const float s[TGSI_QUAD_SIZE],
> -                                 const float t[TGSI_QUAD_SIZE],
> -                                 const float p[TGSI_QUAD_SIZE],
> -                                 const float c0[TGSI_QUAD_SIZE],
> +                                 float s,
> +                                 float t,
> +                                 float p,
> +                                 unsigned level,
> +                                 unsigned face_id,
>                                   enum tgsi_sampler_control control,
> -                                 float
> rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
> +                                 float rgba[TGSI_QUAD_SIZE])
>  {
>     const struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
> -   unsigned  j;
> -   unsigned level = samp->level;
>     unsigned xpot = pot_level_size(samp->xpot, level);
>     unsigned ypot = pot_level_size(samp->ypot, level);
> +   const float *out;
>     union tex_tile_address addr;
> +   int c;
>  
> -   addr.value = 0;
> -   addr.bits.level = samp->level;
> -
> -   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> -      int c;
> -
> -      float u = s[j] * xpot;
> -      float v = t[j] * ypot;
> +   float u = s * xpot;
> +   float v = t * ypot;
>  
> -      int uflr = util_ifloor(u);
> -      int vflr = util_ifloor(v);
> +   int uflr = util_ifloor(u);
> +   int vflr = util_ifloor(v);
>  
> -      int x0 = uflr & (xpot - 1);
> -      int y0 = vflr & (ypot - 1);
> +   int x0 = uflr & (xpot - 1);
> +   int y0 = vflr & (ypot - 1);
>  
> -      const float *out = get_texel_2d_no_border(samp, addr, x0, y0);
> +   addr.value = 0;
> +   addr.bits.level = level;
>  
> -      for (c = 0; c < 4; c++) {
> -         rgba[c][j] = out[c];
> -      }
> -   }
> +   out = get_texel_2d_no_border(samp, addr, x0, y0);
> +   for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +      rgba[TGSI_NUM_CHANNELS*c] = out[c];
>  
>     if (DEBUG_TEX) {
>        print_sample(__FUNCTION__, rgba);
> @@ -949,50 +877,44 @@ img_filter_2d_nearest_repeat_POT(struct
> tgsi_sampler *tgsi_sampler,
>  
>  static INLINE void
>  img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
> -                                const float s[TGSI_QUAD_SIZE],
> -                                const float t[TGSI_QUAD_SIZE],
> -                                const float p[TGSI_QUAD_SIZE],
> -                                const float c0[TGSI_QUAD_SIZE],
> +                                float s,
> +                                float t,
> +                                float p,
> +                                unsigned level,
> +                                unsigned face_id,
>                                  enum tgsi_sampler_control control,
> -                                float
> rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
> +                                float rgba[TGSI_QUAD_SIZE])
>  {
>     const struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
> -   unsigned  j;
> -   unsigned level = samp->level;
>     unsigned xpot = pot_level_size(samp->xpot, level);
>     unsigned ypot = pot_level_size(samp->ypot, level);
>     union tex_tile_address addr;
> +   int c;
>  
> -   addr.value = 0;
> -   addr.bits.level = samp->level;
> -
> -   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> -      int c;
> -
> -      float u = s[j] * xpot;
> -      float v = t[j] * ypot;
> -
> -      int x0, y0;
> -      const float *out;
> -
> -      x0 = util_ifloor(u);
> -      if (x0 < 0)
> -         x0 = 0;
> -      else if (x0 > xpot - 1)
> -         x0 = xpot - 1;
> +   float u = s * xpot;
> +   float v = t * ypot;
>  
> -      y0 = util_ifloor(v);
> -      if (y0 < 0)
> -         y0 = 0;
> -      else if (y0 > ypot - 1)
> -         y0 = ypot - 1;
> -
> -      out = get_texel_2d_no_border(samp, addr, x0, y0);
> +   int x0, y0;
> +   const float *out;
>  
> -      for (c = 0; c < 4; c++) {
> -         rgba[c][j] = out[c];
> -      }
> -   }
> +   addr.value = 0;
> +   addr.bits.level = level;
> +
> +   x0 = util_ifloor(u);
> +   if (x0 < 0)
> +      x0 = 0;
> +   else if (x0 > xpot - 1)
> +      x0 = xpot - 1;
> +
> +   y0 = util_ifloor(v);
> +   if (y0 < 0)
> +      y0 = 0;
> +   else if (y0 > ypot - 1)
> +      y0 = ypot - 1;
> +
> +   out = get_texel_2d_no_border(samp, addr, x0, y0);
> +   for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +      rgba[TGSI_NUM_CHANNELS*c] = out[c];
>  
>     if (DEBUG_TEX) {
>        print_sample(__FUNCTION__, rgba);
> @@ -1002,37 +924,34 @@ img_filter_2d_nearest_clamp_POT(struct
> tgsi_sampler *tgsi_sampler,
>  
>  static void
>  img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
> -                        const float s[TGSI_QUAD_SIZE],
> -                        const float t[TGSI_QUAD_SIZE],
> -                        const float p[TGSI_QUAD_SIZE],
> -                        const float c0[TGSI_QUAD_SIZE],
> -                        enum tgsi_sampler_control control,
> -                        float
> rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
> +                      float s,
> +                      float t,
> +                      float p,
> +                      unsigned level,
> +                      unsigned face_id,
> +                      enum tgsi_sampler_control control,
> +                      float rgba[TGSI_QUAD_SIZE])
>  {
>     const struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
>     const struct pipe_resource *texture = samp->view->texture;
> -   unsigned level0, j;
>     int width;
> -   int x[4];
> +   int x;
>     union tex_tile_address addr;
> +   const float *out;
> +   int c;
>  
> -   level0 = samp->level;
> -   width = u_minify(texture->width0, level0);
> +   width = u_minify(texture->width0, level);
>  
>     assert(width > 0);
>  
>     addr.value = 0;
> -   addr.bits.level = samp->level;
> +   addr.bits.level = level;
>  
> -   samp->nearest_texcoord_s(s, width, x);
> +   samp->nearest_texcoord_s(s, width, &x);
>  
> -   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> -      const float *out = get_texel_2d(samp, addr, x[j], 0);
> -      int c;
> -      for (c = 0; c < 4; c++) {
> -         rgba[c][j] = out[c];
> -      }
> -   }
> +   out = get_texel_2d(samp, addr, x, 0);
> +   for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +      rgba[TGSI_NUM_CHANNELS*c] = out[c];
>  
>     if (DEBUG_TEX) {
>        print_sample(__FUNCTION__, rgba);
> @@ -1042,38 +961,35 @@ img_filter_1d_nearest(struct tgsi_sampler
> *tgsi_sampler,
>  
>  static void
>  img_filter_1d_array_nearest(struct tgsi_sampler *tgsi_sampler,
> -                            const float s[TGSI_QUAD_SIZE],
> -                            const float t[TGSI_QUAD_SIZE],
> -                            const float p[TGSI_QUAD_SIZE],
> -                            const float c0[TGSI_QUAD_SIZE],
> +                            float s,
> +                            float t,
> +                            float p,
> +                            unsigned level,
> +                            unsigned face_id,
>                              enum tgsi_sampler_control control,
> -                            float
> rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
> +                            float *rgba)
>  {
>     const struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
>     const struct pipe_resource *texture = samp->view->texture;
> -   unsigned level0, j;
>     int width;
> -   int x[4], layer[4];
> +   int x, layer;
>     union tex_tile_address addr;
> +   const float *out;
> +   int c;
>  
> -   level0 = samp->level;
> -   width = u_minify(texture->width0, level0);
> +   width = u_minify(texture->width0, level);
>  
>     assert(width > 0);
>  
>     addr.value = 0;
> -   addr.bits.level = samp->level;
> +   addr.bits.level = level;
>  
> -   samp->nearest_texcoord_s(s, width, x);
> -   wrap_array_layer(t, texture->array_size, layer);
> +   samp->nearest_texcoord_s(s, width, &x);
> +   wrap_array_layer(t, texture->array_size, &layer);
>  
> -   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> -      const float *out = get_texel_1d_array(samp, addr, x[j],
> layer[j]);
> -      int c;
> -      for (c = 0; c < 4; c++) {
> -         rgba[c][j] = out[c];
> -      }
> -   }
> +   out = get_texel_1d_array(samp, addr, x, layer);
> +   for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +      rgba[TGSI_NUM_CHANNELS*c] = out[c];
>  
>     if (DEBUG_TEX) {
>        print_sample(__FUNCTION__, rgba);
> @@ -1083,41 +999,37 @@ img_filter_1d_array_nearest(struct
> tgsi_sampler *tgsi_sampler,
>  
>  static void
>  img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
> -                      const float s[TGSI_QUAD_SIZE],
> -                      const float t[TGSI_QUAD_SIZE],
> -                      const float p[TGSI_QUAD_SIZE],
> -                      const float c0[TGSI_QUAD_SIZE],
> +                      float s,
> +                      float t,
> +                      float p,
> +                      unsigned level,
> +                      unsigned face_id,
>                        enum tgsi_sampler_control control,
> -                      float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
> +                      float *rgba)
>  {
>     const struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
>     const struct pipe_resource *texture = samp->view->texture;
> -   unsigned level0, j;
>     int width, height;
> -   int x[4], y[4];
> +   int x, y;
>     union tex_tile_address addr;
> +   const float *out;
> +   int c;
>  
> -
> -   level0 = samp->level;
> -   width = u_minify(texture->width0, level0);
> -   height = u_minify(texture->height0, level0);
> +   width = u_minify(texture->width0, level);
> +   height = u_minify(texture->height0, level);
>  
>     assert(width > 0);
>     assert(height > 0);
>   
>     addr.value = 0;
> -   addr.bits.level = samp->level;
> +   addr.bits.level = level;
>  
> -   samp->nearest_texcoord_s(s, width, x);
> -   samp->nearest_texcoord_t(t, height, y);
> +   samp->nearest_texcoord_s(s, width, &x);
> +   samp->nearest_texcoord_t(t, height, &y);
>  
> -   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> -      const float *out = get_texel_2d(samp, addr, x[j], y[j]);
> -      int c;
> -      for (c = 0; c < 4; c++) {
> -         rgba[c][j] = out[c];
> -      }
> -   }
> +   out = get_texel_2d(samp, addr, x, y);
> +   for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +      rgba[TGSI_NUM_CHANNELS*c] = out[c];
>  
>     if (DEBUG_TEX) {
>        print_sample(__FUNCTION__, rgba);
> @@ -1127,41 +1039,38 @@ img_filter_2d_nearest(struct tgsi_sampler
> *tgsi_sampler,
>  
>  static void
>  img_filter_2d_array_nearest(struct tgsi_sampler *tgsi_sampler,
> -                            const float s[TGSI_QUAD_SIZE],
> -                            const float t[TGSI_QUAD_SIZE],
> -                            const float p[TGSI_QUAD_SIZE],
> -                            const float c0[TGSI_QUAD_SIZE],
> +                            float s,
> +                            float t,
> +                            float p,
> +                            unsigned level,
> +                            unsigned face_id,
>                              enum tgsi_sampler_control control,
> -                            float
> rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
> +                            float *rgba)
>  {
>     const struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
>     const struct pipe_resource *texture = samp->view->texture;
> -   unsigned level0, j;
>     int width, height;
> -   int x[4], y[4], layer[4];
> +   int x, y, layer;
>     union tex_tile_address addr;
> +   const float *out;
> +   int c;
>  
> -   level0 = samp->level;
> -   width = u_minify(texture->width0, level0);
> -   height = u_minify(texture->height0, level0);
> +   width = u_minify(texture->width0, level);
> +   height = u_minify(texture->height0, level);
>  
>     assert(width > 0);
>     assert(height > 0);
>   
>     addr.value = 0;
> -   addr.bits.level = samp->level;
> +   addr.bits.level = level;
>  
> -   samp->nearest_texcoord_s(s, width, x);
> -   samp->nearest_texcoord_t(t, height, y);
> -   wrap_array_layer(p, texture->array_size, layer);
> +   samp->nearest_texcoord_s(s, width, &x);
> +   samp->nearest_texcoord_t(t, height, &y);
> +   wrap_array_layer(p, texture->array_size, &layer);
>  
> -   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> -      const float *out = get_texel_2d_array(samp, addr, x[j], y[j],
> layer[j]);
> -      int c;
> -      for (c = 0; c < 4; c++) {
> -         rgba[c][j] = out[c];
> -      }
> -   }
> +   out = get_texel_2d_array(samp, addr, x, y, layer);
> +   for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +      rgba[TGSI_NUM_CHANNELS*c] = out[c];
>  
>     if (DEBUG_TEX) {
>        print_sample(__FUNCTION__, rgba);
> @@ -1179,41 +1088,37 @@ face(union tex_tile_address addr, unsigned
> face )
>  
>  static void
>  img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
> -                        const float s[TGSI_QUAD_SIZE],
> -                        const float t[TGSI_QUAD_SIZE],
> -                        const float p[TGSI_QUAD_SIZE],
> -                        const float c0[TGSI_QUAD_SIZE],
> +                        float s,
> +                        float t,
> +                        float p,
> +                        unsigned level,
> +                        unsigned face_id,
>                          enum tgsi_sampler_control control,
> -                        float
> rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
> +                        float *rgba)
>  {
>     const struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
>     const struct pipe_resource *texture = samp->view->texture;
> -   const unsigned *faces = samp->faces; /* zero when not
> cube-mapping */
> -   unsigned level0, j;
>     int width, height;
> -   int x[4], y[4];
> +   int x, y;
>     union tex_tile_address addr;
> +   const float *out;
> +   int c;
>  
> -   level0 = samp->level;
> -   width = u_minify(texture->width0, level0);
> -   height = u_minify(texture->height0, level0);
> +   width = u_minify(texture->width0, level);
> +   height = u_minify(texture->height0, level);
>  
>     assert(width > 0);
>     assert(height > 0);
>   
>     addr.value = 0;
> -   addr.bits.level = samp->level;
> +   addr.bits.level = level;
>  
> -   samp->nearest_texcoord_s(s, width, x);
> -   samp->nearest_texcoord_t(t, height, y);
> +   samp->nearest_texcoord_s(s, width, &x);
> +   samp->nearest_texcoord_t(t, height, &y);
>  
> -   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> -      const float *out = get_texel_2d(samp, face(addr, faces[j]),
> x[j], y[j]);
> -      int c;
> -      for (c = 0; c < 4; c++) {
> -         rgba[c][j] = out[c];
> -      }
> -   }
> +   out = get_texel_2d(samp, face(addr, face_id), x, y);
> +   for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +      rgba[TGSI_NUM_CHANNELS*c] = out[c];
>  
>     if (DEBUG_TEX) {
>        print_sample(__FUNCTION__, rgba);
> @@ -1223,326 +1128,304 @@ img_filter_cube_nearest(struct tgsi_sampler
> *tgsi_sampler,
>  
>  static void
>  img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
> -                      const float s[TGSI_QUAD_SIZE],
> -                      const float t[TGSI_QUAD_SIZE],
> -                      const float p[TGSI_QUAD_SIZE],
> -                      const float c0[TGSI_QUAD_SIZE],
> +                      float s,
> +                      float t,
> +                      float p,
> +                      unsigned level,
> +                      unsigned face_id,
>                        enum tgsi_sampler_control control,
> -                      float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
> +                      float *rgba)
>  {
>     const struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
>     const struct pipe_resource *texture = samp->view->texture;
> -   unsigned level0, j;
>     int width, height, depth;
> -   int x[4], y[4], z[4];
> +   int x, y, z;
>     union tex_tile_address addr;
> +   const float *out;
> +   int c;
>  
> -   level0 = samp->level;
> -   width = u_minify(texture->width0, level0);
> -   height = u_minify(texture->height0, level0);
> -   depth = u_minify(texture->depth0, level0);
> +   width = u_minify(texture->width0, level);
> +   height = u_minify(texture->height0, level);
> +   depth = u_minify(texture->depth0, level);
>  
>     assert(width > 0);
>     assert(height > 0);
>     assert(depth > 0);
>  
> -   samp->nearest_texcoord_s(s, width,  x);
> -   samp->nearest_texcoord_t(t, height, y);
> -   samp->nearest_texcoord_p(p, depth,  z);
> +   samp->nearest_texcoord_s(s, width,  &x);
> +   samp->nearest_texcoord_t(t, height, &y);
> +   samp->nearest_texcoord_p(p, depth,  &z);
>  
>     addr.value = 0;
> -   addr.bits.level = samp->level;
> +   addr.bits.level = level;
>  
> -   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> -      const float *out = get_texel_3d(samp, addr, x[j], y[j], z[j]);
> -      int c;
> -      for (c = 0; c < 4; c++) {
> -         rgba[c][j] = out[c];
> -      }
> -   }
> +   out = get_texel_3d(samp, addr, x, y, z);
> +   for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +      rgba[TGSI_NUM_CHANNELS*c] = out[c];
>  }
>  
>  
>  static void
>  img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
> -                     const float s[TGSI_QUAD_SIZE],
> -                     const float t[TGSI_QUAD_SIZE],
> -                     const float p[TGSI_QUAD_SIZE],
> -                     const float c0[TGSI_QUAD_SIZE],
> +                     float s,
> +                     float t,
> +                     float p,
> +                     unsigned level,
> +                     unsigned face_id,
>                       enum tgsi_sampler_control control,
> -                     float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
> +                     float *rgba)
>  {
>     const struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
>     const struct pipe_resource *texture = samp->view->texture;
> -   unsigned level0, j;
>     int width;
> -   int x0[4], x1[4];
> -   float xw[4]; /* weights */
> +   int x0, x1;
> +   float xw; /* weights */
>     union tex_tile_address addr;
> +   const float *tx0, *tx1;
> +   int c;
>  
> -   level0 = samp->level;
> -   width = u_minify(texture->width0, level0);
> +   width = u_minify(texture->width0, level);
>  
>     assert(width > 0);
>  
>     addr.value = 0;
> -   addr.bits.level = samp->level;
> +   addr.bits.level = level;
>  
> -   samp->linear_texcoord_s(s, width, x0, x1, xw);
> +   samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
>  
> -   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> -      const float *tx0 = get_texel_2d(samp, addr, x0[j], 0);
> -      const float *tx1 = get_texel_2d(samp, addr, x1[j], 0);
> -      int c;
> +   tx0 = get_texel_2d(samp, addr, x0, 0);
> +   tx1 = get_texel_2d(samp, addr, x1, 0);
>  
> -      /* interpolate R, G, B, A */
> -      for (c = 0; c < 4; c++) {
> -         rgba[c][j] = lerp(xw[j], tx0[c], tx1[c]);
> -      }
> -   }
> +   /* interpolate R, G, B, A */
> +   for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +      rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
>  }
>  
>  
>  static void
>  img_filter_1d_array_linear(struct tgsi_sampler *tgsi_sampler,
> -                           const float s[TGSI_QUAD_SIZE],
> -                           const float t[TGSI_QUAD_SIZE],
> -                           const float p[TGSI_QUAD_SIZE],
> -                           const float c0[TGSI_QUAD_SIZE],
> +                           float s,
> +                           float t,
> +                           float p,
> +                           unsigned level,
> +                           unsigned face_id,
>                             enum tgsi_sampler_control control,
> -                           float
> rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
> +                           float *rgba)
>  {
>     const struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
>     const struct pipe_resource *texture = samp->view->texture;
> -   unsigned level0, j;
>     int width;
> -   int x0[4], x1[4], layer[4];
> -   float xw[4]; /* weights */
> +   int x0, x1, layer;
> +   float xw; /* weights */
>     union tex_tile_address addr;
> +   const float *tx0, *tx1;
> +   int c;
>  
> -   level0 = samp->level;
> -   width = u_minify(texture->width0, level0);
> +   width = u_minify(texture->width0, level);
>  
>     assert(width > 0);
>  
>     addr.value = 0;
> -   addr.bits.level = samp->level;
> +   addr.bits.level = level;
>  
> -   samp->linear_texcoord_s(s, width, x0, x1, xw);
> -   wrap_array_layer(t, texture->array_size, layer);
> +   samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
> +   wrap_array_layer(t, texture->array_size, &layer);
>  
> -   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> -      const float *tx0 = get_texel_1d_array(samp, addr, x0[j],
> layer[j]);
> -      const float *tx1 = get_texel_1d_array(samp, addr, x1[j],
> layer[j]);
> -      int c;
> +   tx0 = get_texel_1d_array(samp, addr, x0, layer);
> +   tx1 = get_texel_1d_array(samp, addr, x1, layer);
>  
> -      /* interpolate R, G, B, A */
> -      for (c = 0; c < 4; c++) {
> -         rgba[c][j] = lerp(xw[j], tx0[c], tx1[c]);
> -      }
> -   }
> +   /* interpolate R, G, B, A */
> +   for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +      rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
>  }
>  
>  
>  static void
>  img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
> -                     const float s[TGSI_QUAD_SIZE],
> -                     const float t[TGSI_QUAD_SIZE],
> -                     const float p[TGSI_QUAD_SIZE],
> -                     const float c0[TGSI_QUAD_SIZE],
> +                     float s,
> +                     float t,
> +                     float p,
> +                     unsigned level,
> +                     unsigned face_id,
>                       enum tgsi_sampler_control control,
> -                     float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
> +                     float *rgba)
>  {
>     const struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
>     const struct pipe_resource *texture = samp->view->texture;
> -   unsigned level0, j;
>     int width, height;
> -   int x0[4], y0[4], x1[4], y1[4];
> -   float xw[4], yw[4]; /* weights */
> +   int x0, y0, x1, y1;
> +   float xw, yw; /* weights */
>     union tex_tile_address addr;
> +   const float *tx0, *tx1, *tx2, *tx3;
> +   int c;
>  
> -   level0 = samp->level;
> -   width = u_minify(texture->width0, level0);
> -   height = u_minify(texture->height0, level0);
> +   width = u_minify(texture->width0, level);
> +   height = u_minify(texture->height0, level);
>  
>     assert(width > 0);
>     assert(height > 0);
>  
>     addr.value = 0;
> -   addr.bits.level = samp->level;
> +   addr.bits.level = level;
>  
> -   samp->linear_texcoord_s(s, width,  x0, x1, xw);
> -   samp->linear_texcoord_t(t, height, y0, y1, yw);
> +   samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
> +   samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
>  
> -   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> -      const float *tx0 = get_texel_2d(samp, addr, x0[j], y0[j]);
> -      const float *tx1 = get_texel_2d(samp, addr, x1[j], y0[j]);
> -      const float *tx2 = get_texel_2d(samp, addr, x0[j], y1[j]);
> -      const float *tx3 = get_texel_2d(samp, addr, x1[j], y1[j]);
> -      int c;
> +   tx0 = get_texel_2d(samp, addr, x0, y0);
> +   tx1 = get_texel_2d(samp, addr, x1, y0);
> +   tx2 = get_texel_2d(samp, addr, x0, y1);
> +   tx3 = get_texel_2d(samp, addr, x1, y1);
>  
> -      /* interpolate R, G, B, A */
> -      for (c = 0; c < 4; c++) {
> -         rgba[c][j] = lerp_2d(xw[j], yw[j],
> -                              tx0[c], tx1[c],
> -                              tx2[c], tx3[c]);
> -      }
> -   }
> +   /* interpolate R, G, B, A */
> +   for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +      rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
> +                                          tx0[c], tx1[c],
> +                                          tx2[c], tx3[c]);
>  }
>  
>  
>  static void
>  img_filter_2d_array_linear(struct tgsi_sampler *tgsi_sampler,
> -                           const float s[TGSI_QUAD_SIZE],
> -                           const float t[TGSI_QUAD_SIZE],
> -                           const float p[TGSI_QUAD_SIZE],
> -                           const float c0[TGSI_QUAD_SIZE],
> +                           float s,
> +                           float t,
> +                           float p,
> +                           unsigned level,
> +                           unsigned face_id,
>                             enum tgsi_sampler_control control,
> -                           float
> rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
> +                           float *rgba)
>  {
>     const struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
>     const struct pipe_resource *texture = samp->view->texture;
> -   unsigned level0, j;
>     int width, height;
> -   int x0[4], y0[4], x1[4], y1[4], layer[4];
> -   float xw[4], yw[4]; /* weights */
> +   int x0, y0, x1, y1, layer;
> +   float xw, yw; /* weights */
>     union tex_tile_address addr;
> +   const float *tx0, *tx1, *tx2, *tx3;
> +   int c;
>  
> -   level0 = samp->level;
> -   width = u_minify(texture->width0, level0);
> -   height = u_minify(texture->height0, level0);
> +   width = u_minify(texture->width0, level);
> +   height = u_minify(texture->height0, level);
>  
>     assert(width > 0);
>     assert(height > 0);
>  
>     addr.value = 0;
> -   addr.bits.level = samp->level;
> +   addr.bits.level = level;
>  
> -   samp->linear_texcoord_s(s, width,  x0, x1, xw);
> -   samp->linear_texcoord_t(t, height, y0, y1, yw);
> -   wrap_array_layer(p, texture->array_size, layer);
> +   samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
> +   samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
> +   wrap_array_layer(p, texture->array_size, &layer);
>  
> -   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> -      const float *tx0 = get_texel_2d_array(samp, addr, x0[j],
> y0[j], layer[j]);
> -      const float *tx1 = get_texel_2d_array(samp, addr, x1[j],
> y0[j], layer[j]);
> -      const float *tx2 = get_texel_2d_array(samp, addr, x0[j],
> y1[j], layer[j]);
> -      const float *tx3 = get_texel_2d_array(samp, addr, x1[j],
> y1[j], layer[j]);
> -      int c;
> +   tx0 = get_texel_2d_array(samp, addr, x0, y0, layer);
> +   tx1 = get_texel_2d_array(samp, addr, x1, y0, layer);
> +   tx2 = get_texel_2d_array(samp, addr, x0, y1, layer);
> +   tx3 = get_texel_2d_array(samp, addr, x1, y1, layer);
>  
> -      /* interpolate R, G, B, A */
> -      for (c = 0; c < 4; c++) {
> -         rgba[c][j] = lerp_2d(xw[j], yw[j],
> -                              tx0[c], tx1[c],
> -                              tx2[c], tx3[c]);
> -      }
> -   }
> +   /* interpolate R, G, B, A */
> +   for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +      rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
> +                                          tx0[c], tx1[c],
> +                                          tx2[c], tx3[c]);
>  }
>  
>  
>  static void
>  img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
> -                       const float s[TGSI_QUAD_SIZE],
> -                       const float t[TGSI_QUAD_SIZE],
> -                       const float p[TGSI_QUAD_SIZE],
> -                       const float c0[TGSI_QUAD_SIZE],
> +                       float s,
> +                       float t,
> +                       float p,
> +                       unsigned level,
> +                       unsigned face_id,
>                         enum tgsi_sampler_control control,
> -                       float
> rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
> +                       float *rgba)
>  {
>     const struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
>     const struct pipe_resource *texture = samp->view->texture;
> -   const unsigned *faces = samp->faces; /* zero when not
> cube-mapping */
> -   unsigned level0, j;
>     int width, height;
> -   int x0[4], y0[4], x1[4], y1[4];
> -   float xw[4], yw[4]; /* weights */
> -   union tex_tile_address addr;
> +   int x0, y0, x1, y1;
> +   float xw, yw; /* weights */
> +   union tex_tile_address addr, addrj;
> +   const float *tx0, *tx1, *tx2, *tx3;
> +   int c;
>  
> -   level0 = samp->level;
> -   width = u_minify(texture->width0, level0);
> -   height = u_minify(texture->height0, level0);
> +   width = u_minify(texture->width0, level);
> +   height = u_minify(texture->height0, level);
>  
>     assert(width > 0);
>     assert(height > 0);
>  
>     addr.value = 0;
> -   addr.bits.level = samp->level;
> +   addr.bits.level = level;
>  
> -   samp->linear_texcoord_s(s, width,  x0, x1, xw);
> -   samp->linear_texcoord_t(t, height, y0, y1, yw);
> +   samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
> +   samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
>  
> -   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> -      union tex_tile_address addrj = face(addr, faces[j]);
> -      const float *tx0 = get_texel_2d(samp, addrj, x0[j], y0[j]);
> -      const float *tx1 = get_texel_2d(samp, addrj, x1[j], y0[j]);
> -      const float *tx2 = get_texel_2d(samp, addrj, x0[j], y1[j]);
> -      const float *tx3 = get_texel_2d(samp, addrj, x1[j], y1[j]);
> -      int c;
> +   addrj = face(addr, face_id);
> +   tx0 = get_texel_2d(samp, addrj, x0, y0);
> +   tx1 = get_texel_2d(samp, addrj, x1, y0);
> +   tx2 = get_texel_2d(samp, addrj, x0, y1);
> +   tx3 = get_texel_2d(samp, addrj, x1, y1);
>  
> -      /* interpolate R, G, B, A */
> -      for (c = 0; c < 4; c++) {
> -         rgba[c][j] = lerp_2d(xw[j], yw[j],
> -                              tx0[c], tx1[c],
> -                              tx2[c], tx3[c]);
> -      }
> -   }
> +   /* interpolate R, G, B, A */
> +   for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +      rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
> +                                          tx0[c], tx1[c],
> +                                          tx2[c], tx3[c]);
>  }
>  
>  
>  static void
>  img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
> -                     const float s[TGSI_QUAD_SIZE],
> -                     const float t[TGSI_QUAD_SIZE],
> -                     const float p[TGSI_QUAD_SIZE],
> -                     const float c0[TGSI_QUAD_SIZE],
> +                     float s,
> +                     float t,
> +                     float p,
> +                     unsigned level,
> +                     unsigned face_id,
>                       enum tgsi_sampler_control control,
> -                     float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
> +                     float *rgba)
>  {
>     const struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
>     const struct pipe_resource *texture = samp->view->texture;
> -   unsigned level0, j;
>     int width, height, depth;
> -   int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
> -   float xw[4], yw[4], zw[4]; /* interpolation weights */
> +   int x0, x1, y0, y1, z0, z1;
> +   float xw, yw, zw; /* interpolation weights */
>     union tex_tile_address addr;
> +   const float *tx00, *tx01, *tx02, *tx03, *tx10, *tx11, *tx12,
> *tx13;
> +   int c;
>  
> -   level0 = samp->level;
> -   width = u_minify(texture->width0, level0);
> -   height = u_minify(texture->height0, level0);
> -   depth = u_minify(texture->depth0, level0);
> +   width = u_minify(texture->width0, level);
> +   height = u_minify(texture->height0, level);
> +   depth = u_minify(texture->depth0, level);
>  
>     addr.value = 0;
> -   addr.bits.level = level0;
> +   addr.bits.level = level;
>  
>     assert(width > 0);
>     assert(height > 0);
>     assert(depth > 0);
>  
> -   samp->linear_texcoord_s(s, width,  x0, x1, xw);
> -   samp->linear_texcoord_t(t, height, y0, y1, yw);
> -   samp->linear_texcoord_p(p, depth,  z0, z1, zw);
> +   samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
> +   samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
> +   samp->linear_texcoord_p(p, depth,  &z0, &z1, &zw);
>  
> -   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> -      int c;
>  
> -      const float *tx00 = get_texel_3d(samp, addr, x0[j], y0[j],
> z0[j]);
> -      const float *tx01 = get_texel_3d(samp, addr, x1[j], y0[j],
> z0[j]);
> -      const float *tx02 = get_texel_3d(samp, addr, x0[j], y1[j],
> z0[j]);
> -      const float *tx03 = get_texel_3d(samp, addr, x1[j], y1[j],
> z0[j]);
> +   tx00 = get_texel_3d(samp, addr, x0, y0, z0);
> +   tx01 = get_texel_3d(samp, addr, x1, y0, z0);
> +   tx02 = get_texel_3d(samp, addr, x0, y1, z0);
> +   tx03 = get_texel_3d(samp, addr, x1, y1, z0);
>        
> -      const float *tx10 = get_texel_3d(samp, addr, x0[j], y0[j],
> z1[j]);
> -      const float *tx11 = get_texel_3d(samp, addr, x1[j], y0[j],
> z1[j]);
> -      const float *tx12 = get_texel_3d(samp, addr, x0[j], y1[j],
> z1[j]);
> -      const float *tx13 = get_texel_3d(samp, addr, x1[j], y1[j],
> z1[j]);
> +   tx10 = get_texel_3d(samp, addr, x0, y0, z1);
> +   tx11 = get_texel_3d(samp, addr, x1, y0, z1);
> +   tx12 = get_texel_3d(samp, addr, x0, y1, z1);
> +   tx13 = get_texel_3d(samp, addr, x1, y1, z1);
>        
>        /* interpolate R, G, B, A */
> -      for (c = 0; c < 4; c++) {
> -         rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
> -                              tx00[c], tx01[c],
> -                              tx02[c], tx03[c],
> -                              tx10[c], tx11[c],
> -                              tx12[c], tx13[c]);
> -      }
> -   }
> +   for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +      rgba[TGSI_NUM_CHANNELS*c] =  lerp_3d(xw, yw, zw,
> +                                           tx00[c], tx01[c],
> +                                           tx02[c], tx03[c],
> +                                           tx10[c], tx11[c],
> +                                           tx12[c], tx13[c]);
>  }
>  
>  
> @@ -1575,12 +1458,11 @@ mip_filter_linear(struct tgsi_sampler
> *tgsi_sampler,
>  {
>     struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
>     const struct pipe_resource *texture = samp->view->texture;
> -   int level0;
> -   float lambda;
> +   int j;
>     float lod[TGSI_QUAD_SIZE];
>  
>     if (control == tgsi_sampler_lod_bias) {
> -      lambda = samp->compute_lambda(samp, s, t, p) +
> samp->sampler->lod_bias;
> +      float lambda = samp->compute_lambda(samp, s, t, p) +
> samp->sampler->lod_bias;
>        compute_lod(samp->sampler, lambda, c0, lod);
>     } else {
>        assert(control == tgsi_sampler_lod_explicit);
> @@ -1588,40 +1470,31 @@ mip_filter_linear(struct tgsi_sampler
> *tgsi_sampler,
>        memcpy(lod, c0, sizeof(lod));
>     }
>  
> -   /* XXX: Take into account all lod values.
> -    */
> -   lambda = lod[0];
> -   level0 = samp->view->u.tex.first_level + (int)lambda;
> +   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> +      int level0 = samp->view->u.tex.first_level + (int)lod[j];
>  
> -   if (lambda < 0.0) {
> -      samp->level = samp->view->u.tex.first_level;
> -      samp->mag_img_filter(tgsi_sampler, s, t, p, NULL,
> tgsi_sampler_lod_bias, rgba);
> -   }
> -   else if (level0 >= texture->last_level) {
> -      samp->level = texture->last_level;
> -      samp->min_img_filter(tgsi_sampler, s, t, p, NULL,
> tgsi_sampler_lod_bias, rgba);
> -   }
> -   else {
> -      float levelBlend = frac(lambda);
> -      float rgba0[4][4];
> -      float rgba1[4][4];
> -      int c,j;
> +      if (lod[j] < 0.0)
> +         samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j],
> samp->view->u.tex.first_level, samp->faces[j],
> tgsi_sampler_lod_bias, &rgba[0][j]);
>  
> -      samp->level = level0;
> -      samp->min_img_filter(tgsi_sampler, s, t, p, NULL,
> tgsi_sampler_lod_bias, rgba0);
> +      else if (level0 >= texture->last_level)
> +         samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j],
> texture->last_level, samp->faces[j], tgsi_sampler_lod_bias,
> &rgba[0][j]);
>  
> -      samp->level = level0+1;
> -      samp->min_img_filter(tgsi_sampler, s, t, p, NULL,
> tgsi_sampler_lod_bias, rgba1);
> +      else {
> +         float levelBlend = frac(lod[j]);
> +         float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
> +         int c;
> +
> +         samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j],
> level0,   samp->faces[j], tgsi_sampler_lod_bias, &rgbax[0][0]);
> +         samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j],
> level0+1, samp->faces[j], tgsi_sampler_lod_bias, &rgbax[0][1]);
>  
> -      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
>           for (c = 0; c < 4; c++) {
> -            rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
> +            rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
>           }
>        }
>     }
>  
>     if (DEBUG_TEX) {
> -      print_sample(__FUNCTION__, rgba);
> +      print_sample_4(__FUNCTION__, rgba);
>     }
>  }
>  
> @@ -1642,11 +1515,11 @@ mip_filter_nearest(struct tgsi_sampler
> *tgsi_sampler,
>  {
>     struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
>     const struct pipe_resource *texture = samp->view->texture;
> -   float lambda;
>     float lod[TGSI_QUAD_SIZE];
> +   int j;
>  
>     if (control == tgsi_sampler_lod_bias) {
> -      lambda = samp->compute_lambda(samp, s, t, p) +
> samp->sampler->lod_bias;
> +      float lambda = samp->compute_lambda(samp, s, t, p) +
> samp->sampler->lod_bias;
>        compute_lod(samp->sampler, lambda, c0, lod);
>     } else {
>        assert(control == tgsi_sampler_lod_explicit);
> @@ -1654,22 +1527,18 @@ mip_filter_nearest(struct tgsi_sampler
> *tgsi_sampler,
>        memcpy(lod, c0, sizeof(lod));
>     }
>  
> -   /* XXX: Take into account all lod values.
> -    */
> -   lambda = lod[0];
> -
> -   if (lambda < 0.0) {
> -      samp->level = samp->view->u.tex.first_level;
> -      samp->mag_img_filter(tgsi_sampler, s, t, p, NULL,
> tgsi_sampler_lod_bias, rgba);
> -   }
> -   else {
> -      samp->level = samp->view->u.tex.first_level + (int)(lambda +
> 0.5F) ;
> -      samp->level = MIN2(samp->level, (int)texture->last_level);
> -      samp->min_img_filter(tgsi_sampler, s, t, p, NULL,
> tgsi_sampler_lod_bias, rgba);
> +   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> +      if (lod[j] < 0.0)
> +         samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j],
> samp->view->u.tex.first_level, samp->faces[j],
> tgsi_sampler_lod_bias, &rgba[0][j]);
> +      else {
> +         float level = samp->view->u.tex.first_level + (int)(lod[j]
> + 0.5F) ;
> +         level = MIN2(level, (int)texture->last_level);
> +         samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], level,
> samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
> +      }
>     }
>  
>     if (DEBUG_TEX) {
> -      print_sample(__FUNCTION__, rgba);
> +      print_sample_4(__FUNCTION__, rgba);
>     }
>  }
>  
> @@ -1684,11 +1553,11 @@ mip_filter_none(struct tgsi_sampler
> *tgsi_sampler,
>                  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
>  {
>     struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
> -   float lambda;
>     float lod[TGSI_QUAD_SIZE];
> +   int j;
>  
>     if (control == tgsi_sampler_lod_bias) {
> -      lambda = samp->compute_lambda(samp, s, t, p) +
> samp->sampler->lod_bias;
> +      float lambda = samp->compute_lambda(samp, s, t, p) +
> samp->sampler->lod_bias;
>        compute_lod(samp->sampler, lambda, c0, lod);
>     } else {
>        assert(control == tgsi_sampler_lod_explicit);
> @@ -1696,20 +1565,34 @@ mip_filter_none(struct tgsi_sampler
> *tgsi_sampler,
>        memcpy(lod, c0, sizeof(lod));
>     }
>  
> -   /* XXX: Take into account all lod values.
> -    */
> -   lambda = lod[0];
> -
> -   samp->level = samp->view->u.tex.first_level;
> -   if (lambda < 0.0) {
> -      samp->mag_img_filter(tgsi_sampler, s, t, p, NULL,
> tgsi_sampler_lod_bias, rgba);
> -   }
> -   else {
> -      samp->min_img_filter(tgsi_sampler, s, t, p, NULL,
> tgsi_sampler_lod_bias, rgba);
> +   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> +      if (lod[j] < 0.0) {
> +         samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j],
> samp->view->u.tex.first_level, samp->faces[j],
> tgsi_sampler_lod_bias, &rgba[0][j]);
> +      }
> +      else {
> +         samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j],
> samp->view->u.tex.first_level, samp->faces[j],
> tgsi_sampler_lod_bias, &rgba[0][j]);
> +      }
>     }
>  }
>  
>  
> +static void
> +mip_filter_none_no_filter_select(struct tgsi_sampler *tgsi_sampler,
> +                                     const float s[TGSI_QUAD_SIZE],
> +                                     const float t[TGSI_QUAD_SIZE],
> +                                     const float p[TGSI_QUAD_SIZE],
> +                                     const float c0[TGSI_QUAD_SIZE],
> +                                     enum tgsi_sampler_control
> control,
> +                                     float
> rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
> +{
> +   struct sp_sampler_variant *samp =
> sp_sampler_variant(tgsi_sampler);
> +   int j;
> +
> +   for (j = 0; j < TGSI_QUAD_SIZE; j++)
> +      samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j],
> samp->view->u.tex.first_level, samp->faces[j],
> tgsi_sampler_lod_bias, &rgba[0][j]);
> +}
> +
> +
>  /* For anisotropic filtering */
>  #define WEIGHT_LUT_SIZE 1024
>  
> @@ -1738,7 +1621,7 @@ create_filter_table(void)
>  /**
>   * Elliptical weighted average (EWA) filter for producing high
>   quality
>   * anisotropic filtered results.
> - * Based on the Higher Quality Elliptical Weighted Avarage Filter
> + * Based on the Higher Quality Elliptical Weighted Average Filter
>   * published by Paul S. Heckbert in his Master's Thesis
>   * "Fundamentals of Texture Mapping and Image Warping" (1989)
>   */
> @@ -1747,7 +1630,7 @@ img_filter_2d_ewa(struct tgsi_sampler
> *tgsi_sampler,
>                    const float s[TGSI_QUAD_SIZE],
>                    const float t[TGSI_QUAD_SIZE],
>                    const float p[TGSI_QUAD_SIZE],
> -                  const float c0[TGSI_QUAD_SIZE],
> +                  unsigned level,
>                    enum tgsi_sampler_control control,
>                    const float dudx, const float dvdx,
>                    const float dudy, const float dvdy,
> @@ -1756,7 +1639,8 @@ img_filter_2d_ewa(struct tgsi_sampler
> *tgsi_sampler,
>     const struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
>     const struct pipe_resource *texture = samp->view->texture;
>  
> -   unsigned level0 = samp->level > 0 ? samp->level : 0;
> +   // ??? Won't the image filters blow up if level is negative?
> +   unsigned level0 = level > 0 ? level : 0;
>     float scaling = 1.0 / (1 << level0);
>     int width = u_minify(texture->width0, level0);
>     int height = u_minify(texture->height0, level0);
> @@ -1857,9 +1741,9 @@ img_filter_2d_ewa(struct tgsi_sampler
> *tgsi_sampler,
>                     * img_filter_2d_nearest or one of the
>                     * accelerated img_filter_2d_nearest_XXX
>                     functions.
>                     */
> -                  samp->min_img_filter(tgsi_sampler, s_buffer,
> t_buffer, p, NULL,
> -                                        tgsi_sampler_lod_bias,
> rgba_temp);
>                    for (jj = 0; jj < buffer_next; jj++) {
> +                     samp->min_img_filter(tgsi_sampler,
> s_buffer[jj], t_buffer[jj], p[jj], level, samp->faces[j],
> +                                          tgsi_sampler_lod_bias,
> &rgba_temp[0][jj]);
>                       num[0] += weight_buffer[jj] * rgba_temp[0][jj];
>                       num[1] += weight_buffer[jj] * rgba_temp[1][jj];
>                       num[2] += weight_buffer[jj] * rgba_temp[2][jj];
> @@ -1877,10 +1761,7 @@ img_filter_2d_ewa(struct tgsi_sampler
> *tgsi_sampler,
>        }
>  
>        /* if the tex coord buffer contains unread values, we will
>        read
> -       * them now.  Note that in most cases we have to read more
> pixel
> -       * values than required, however, as the img_filter_2d_nearest
> -       * function(s) does not have a count parameter, we need to
> read
> -       * the whole quad and ignore the unused values
> +       * them now.
>         */
>        if (buffer_next > 0) {
>           unsigned jj;
> @@ -1888,9 +1769,9 @@ img_filter_2d_ewa(struct tgsi_sampler
> *tgsi_sampler,
>            * img_filter_2d_nearest or one of the
>            * accelerated img_filter_2d_nearest_XXX functions.
>            */
> -         samp->min_img_filter(tgsi_sampler, s_buffer, t_buffer, p,
> NULL,
> -                               tgsi_sampler_lod_bias, rgba_temp);
>           for (jj = 0; jj < buffer_next; jj++) {
> +            samp->min_img_filter(tgsi_sampler, s_buffer[jj],
> t_buffer[jj], p[jj], level, samp->faces[j],
> +                                 tgsi_sampler_lod_bias,
> &rgba_temp[0][jj]);
>              num[0] += weight_buffer[jj] * rgba_temp[0][jj];
>              num[1] += weight_buffer[jj] * rgba_temp[1][jj];
>              num[2] += weight_buffer[jj] * rgba_temp[2][jj];
> @@ -1909,8 +1790,8 @@ img_filter_2d_ewa(struct tgsi_sampler
> *tgsi_sampler,
>           rgba[2]=0;
>           rgba[3]=0;*/
>           /* not enough pixels in resampling, resort to direct
>           interpolation */
> -         samp->min_img_filter(tgsi_sampler, s, t, p, NULL,
> -                              tgsi_sampler_lod_bias, rgba_temp);
> +         samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], level,
> samp->faces[j],
> +                              tgsi_sampler_lod_bias,
> &rgba_temp[0][j]);
>           den = 1;
>           num[0] = rgba_temp[0][j];
>           num[1] = rgba_temp[1][j];
> @@ -2005,20 +1886,20 @@ mip_filter_linear_aniso(struct tgsi_sampler
> *tgsi_sampler,
>      * simply return the average of the whole image.
>      */
>     if (level0 >= (int) texture->last_level) {
> -      samp->level = texture->last_level;
> -      samp->min_img_filter(tgsi_sampler, s, t, p, NULL,
> tgsi_sampler_lod_bias, rgba);
> +      int j;
> +      for (j = 0; j < TGSI_QUAD_SIZE; j++)
> +         samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j],
> texture->last_level, samp->faces[j], tgsi_sampler_lod_bias,
> &rgba[0][j]);
>     }
>     else {
>        /* don't bother interpolating between multiple LODs; it
>        doesn't
>         * seem to be worth the extra running time.
>         */
> -      samp->level = level0;
> -      img_filter_2d_ewa(tgsi_sampler, s, t, p, NULL,
> tgsi_sampler_lod_bias,
> +      img_filter_2d_ewa(tgsi_sampler, s, t, p, level0,
> tgsi_sampler_lod_bias,
>                          dudx, dvdx, dudy, dvdy, rgba);
>     }
>  
>     if (DEBUG_TEX) {
> -      print_sample(__FUNCTION__, rgba);
> +      print_sample_4(__FUNCTION__, rgba);
>     }
>  }
>  
> @@ -2039,7 +1920,7 @@ mip_filter_linear_2d_linear_repeat_POT(
>  {
>     struct sp_sampler_variant *samp =
>     sp_sampler_variant(tgsi_sampler);
>     const struct pipe_resource *texture = samp->view->texture;
> -   int level0;
> +   int j;
>     float lambda;
>     float lod[TGSI_QUAD_SIZE];
>  
> @@ -2052,42 +1933,33 @@ mip_filter_linear_2d_linear_repeat_POT(
>        memcpy(lod, c0, sizeof(lod));
>     }
>  
> -   /* XXX: Take into account all lod values.
> -    */
> -   lambda = lod[0];
> -   level0 = samp->view->u.tex.first_level + (int)lambda;
> -
> -   /* Catches both negative and large values of level0:
> -    */
> -   if ((unsigned)level0 >= texture->last_level) {
> -      if (level0 < 0)
> -         samp->level = samp->view->u.tex.first_level;
> -      else
> -         samp->level = texture->last_level;
> +   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> +      int level0 = samp->view->u.tex.first_level + (int)lod[j];
>  
> -      img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL,
> tgsi_sampler_lod_bias, rgba);
> -   }
> -   else {
> -      float levelBlend = frac(lambda);
> -      float rgba0[4][4];
> -      float rgba1[4][4];
> -      int c,j;
> +      /* Catches both negative and large values of level0:
> +       */
> +      if ((unsigned)level0 >= texture->last_level) {
> +         if (level0 < 0)
> +            img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j],
> t[j], p[j], samp->view->u.tex.first_level, samp->faces[j],
> tgsi_sampler_lod_bias, &rgba[0][j]);
> +         else
> +            img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j],
> t[j], p[j], samp->view->texture->last_level, samp->faces[j],
> tgsi_sampler_lod_bias, &rgba[0][j]);
>  
> -      samp->level = level0;
> -      img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL,
> tgsi_sampler_lod_bias, rgba0);
> +      }
> +      else {
> +         float levelBlend = frac(lod[j]);
> +         float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
> +         int c;
>  
> -      samp->level = level0+1;
> -      img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL,
> tgsi_sampler_lod_bias, rgba1);
> +         img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j], t[j],
> p[j], level0,   samp->faces[j], tgsi_sampler_lod_bias,
> &rgbax[0][j]);
> +         img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j], t[j],
> p[j], level0+1, samp->faces[j], tgsi_sampler_lod_bias,
> &rgbax[0][j]);
>  
> -      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> -         for (c = 0; c < 4; c++) {
> -            rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
> -         }
> +         for (c = 0; c < TGSI_NUM_CHANNELS; c++)
> +            rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
>        }
>     }
>  
>     if (DEBUG_TEX) {
> -      print_sample(__FUNCTION__, rgba);
> +      print_sample_4(__FUNCTION__, rgba);
>     }
>  }
>  
> @@ -2505,7 +2377,7 @@ get_lambda_func(const union sp_sampler_key key)
>  }
>  
>  
> -static filter_func
> +static img_filter_func
>  get_img_filter(const union sp_sampler_key key,
>                 unsigned filter,
>                 const struct pipe_sampler_state *sampler)
> @@ -2597,7 +2469,6 @@ sp_sampler_variant_bind_view( struct
> sp_sampler_variant *samp,
>     samp->cache = tex_cache;
>     samp->xpot = util_logbase2( texture->width0 );
>     samp->ypot = util_logbase2( texture->height0 );
> -   samp->level = view->u.tex.first_level;
>  }
>  
>  
> @@ -2793,7 +2664,7 @@ sp_create_sampler_variant( const struct
> pipe_sampler_state *sampler,
>     switch (sampler->min_mip_filter) {
>     case PIPE_TEX_MIPFILTER_NONE:
>        if (sampler->min_img_filter == sampler->mag_img_filter)
> -         samp->mip_filter = samp->min_img_filter;
> +         samp->mip_filter = mip_filter_none_no_filter_select;
>        else
>           samp->mip_filter = mip_filter_none;
>        break;
> diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h
> b/src/gallium/drivers/softpipe/sp_tex_sample.h
> index 7fdabd4..dd847af 100644
> --- a/src/gallium/drivers/softpipe/sp_tex_sample.h
> +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
> @@ -34,21 +34,30 @@
>  
>  struct sp_sampler_variant;
>  
> -typedef void (*wrap_nearest_func)(const float s[4],
> +typedef void (*wrap_nearest_func)(float s,
>                                    unsigned size,
> -                                  int icoord[4]);
> +                                  int *icoord);
>  
> -typedef void (*wrap_linear_func)(const float s[4],
> +typedef void (*wrap_linear_func)(float s,
>                                   unsigned size,
> -                                 int icoord0[4],
> -                                 int icoord1[4],
> -                                 float w[4]);
> +                                 int *icoord0,
> +                                 int *icoord1,
> +                                 float *w);
>  
>  typedef float (*compute_lambda_func)(const struct sp_sampler_variant
>  *sampler,
>                                       const float s[TGSI_QUAD_SIZE],
>                                       const float t[TGSI_QUAD_SIZE],
>                                       const float p[TGSI_QUAD_SIZE]);
>  
> +typedef void (*img_filter_func)(struct tgsi_sampler *tgsi_sampler,
> +                                float s,
> +                                float t,
> +                                float p,
> +                                unsigned level,
> +                                unsigned face_id,
> +                                enum tgsi_sampler_control control,
> +                                float *rgba);
> +
>  typedef void (*filter_func)(struct tgsi_sampler *tgsi_sampler,
>                              const float s[TGSI_QUAD_SIZE],
>                              const float t[TGSI_QUAD_SIZE],
> @@ -98,9 +107,8 @@ struct sp_sampler_variant
>      */
>     unsigned xpot;
>     unsigned ypot;
> -   unsigned level;
>  
> -   unsigned faces[4];
> +   unsigned faces[TGSI_QUAD_SIZE];
>     
>     wrap_nearest_func nearest_texcoord_s;
>     wrap_nearest_func nearest_texcoord_t;
> @@ -110,8 +118,8 @@ struct sp_sampler_variant
>     wrap_linear_func linear_texcoord_t;
>     wrap_linear_func linear_texcoord_p;
>  
> -   filter_func min_img_filter;
> -   filter_func mag_img_filter;
> +   img_filter_func min_img_filter;
> +   img_filter_func mag_img_filter;
>  
>     compute_lambda_func compute_lambda;
>  
> --
> 1.7.10.280.gaa39
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 


More information about the mesa-dev mailing list