[Mesa-dev] [PATCH 1/2] softpipe: clean up lod computation

Fri Feb 8 07:22:28 PST 2013

On 02/06/2013 10:59 AM, sroland at vmware.com wrote:
> From: Roland Scheidegger<sroland at vmware.com>
>
> This should handle the new lod_zero modifier more correctly.
> The runtime-conditional is a bit more complex however we now also do
> scalar lod computation when appropriate which should more than make up for it.
> The refactoring should also fix an issue with explicit lods
> (lod clamp wasn't applied to them).
> Also, always pass lod as the 5th element from tgsi executor, which simplifies
> things (get rid of annoying conditionals later).
> ---
>   src/gallium/auxiliary/tgsi/tgsi_exec.c       |   64 ++++++------
>   src/gallium/auxiliary/tgsi/tgsi_exec.h       |    5 +-
>   src/gallium/drivers/softpipe/sp_tex_sample.c |  137 ++++++++++++++------------
>   3 files changed, 109 insertions(+), 97 deletions(-)
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> index 6d04cdc..2f5db82 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> @@ -1718,6 +1718,7 @@ fetch_texel( struct tgsi_sampler *sampler,
>      uint j;
>      float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
>
> +   /* FIXME: handle explicit derivs, offsets */
>      sampler->get_samples(sampler, s->f, t->f, p->f, c0->f, c1->f, control, rgba);
>
>      for (j = 0; j<  4; j++) {
> @@ -1750,9 +1751,11 @@ exec_tex(struct tgsi_exec_machine *mach,
>      const uint unit = inst->Src[sampler].Register.Index;
>      union tgsi_exec_channel r[4], cubearraycomp, cubelod;
>      const union tgsi_exec_channel *lod =&ZeroVec;
> -   enum tgsi_sampler_control control;
> +   enum tgsi_sampler_control control =  tgsi_sampler_lod_none;
>      uint chan;
>
> +   assert(modifier != TEX_MODIFIER_LEVEL_ZERO);
> +
>      if (modifier != TEX_MODIFIER_NONE&&  (sampler == 1)) {
>         FETCH(&r[3], 0, TGSI_CHAN_W);
>         if (modifier != TEX_MODIFIER_PROJECTED) {
> @@ -1762,7 +1765,7 @@ exec_tex(struct tgsi_exec_machine *mach,
>
>      if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
>         control = tgsi_sampler_lod_explicit;
> -   } else {
> +   } else if (modifier == TEX_MODIFIER_LOD_BIAS){
>         control = tgsi_sampler_lod_bias;
>      }
>
> @@ -1775,7 +1778,7 @@ exec_tex(struct tgsi_exec_machine *mach,
>         }
>
>         fetch_texel(mach->Samplers[unit],
> -&r[0],&ZeroVec,&ZeroVec, lod,&ZeroVec, /* S, T, P, LOD */
> +&r[0],&ZeroVec,&ZeroVec,&ZeroVec, lod, /* S, T, P, LOD */

I think the /* S, T, P, LOD */ comments should be updated while you're 
at it.  There's 5 arguments on that line, but the comment only has 4 
names.  That's been wrong for a while.

>                     control,
>                     &r[0],&r[1],&r[2],&r[3]);     /* R, G, B, A */
>         break;
> @@ -1788,7 +1791,7 @@ exec_tex(struct tgsi_exec_machine *mach,
>         }
>
>         fetch_texel(mach->Samplers[unit],
> -&r[0],&ZeroVec,&r[2], lod,&ZeroVec, /* S, T, P, LOD */
> +&r[0],&ZeroVec,&r[2],&ZeroVec, lod, /* S, T, P, LOD */
>                     control,
>                     &r[0],&r[1],&r[2],&r[3]);     /* R, G, B, A */
>         break;
> @@ -1808,7 +1811,7 @@ exec_tex(struct tgsi_exec_machine *mach,
>         }
>
>         fetch_texel(mach->Samplers[unit],
> -&r[0],&r[1],&r[2], lod,&ZeroVec,    /* S, T, P, LOD */
> +&r[0],&r[1],&r[2],&ZeroVec, lod,    /* S, T, P, LOD */
>                     control,
>                     &r[0],&r[1],&r[2],&r[3]);  /* outputs */
>         break;
> @@ -1822,7 +1825,7 @@ exec_tex(struct tgsi_exec_machine *mach,
>         }
>
>         fetch_texel(mach->Samplers[unit],
> -&r[0],&r[1],&ZeroVec, lod,&ZeroVec,    /* S, T, P, LOD */
> +&r[0],&r[1],&ZeroVec,&ZeroVec, lod,   /* S, T, P, LOD */
>                     control,
>                     &r[0],&r[1],&r[2],&r[3]);  /* outputs */
>         break;
> @@ -1836,7 +1839,7 @@ exec_tex(struct tgsi_exec_machine *mach,
>         }
>
>         fetch_texel(mach->Samplers[unit],
> -&r[0],&r[1],&r[2], lod,&ZeroVec,    /* S, T, P, LOD */
> +&r[0],&r[1],&r[2],&ZeroVec, lod,   /* S, T, P, LOD */
>                     control,
>                     &r[0],&r[1],&r[2],&r[3]);  /* outputs */
>         break;
> @@ -1852,7 +1855,7 @@ exec_tex(struct tgsi_exec_machine *mach,
>         }
>
>         fetch_texel(mach->Samplers[unit],
> -&r[0],&r[1],&r[2], lod,&ZeroVec,    /* S, T, P, LOD */
> +&r[0],&r[1],&r[2],&ZeroVec, lod,   /* S, T, P, LOD */
>                     control,
>                     &r[0],&r[1],&r[2],&r[3]);  /* outputs */
>         break;
> @@ -1898,7 +1901,7 @@ exec_tex(struct tgsi_exec_machine *mach,
>         }
>
>         fetch_texel(mach->Samplers[unit],
> -&r[0],&r[1],&r[2], lod,&ZeroVec,
> +&r[0],&r[1],&r[2],&ZeroVec, lod,
>                     control,
>                     &r[0],&r[1],&r[2],&r[3]);
>         break;
> @@ -2127,31 +2130,28 @@ exec_sample(struct tgsi_exec_machine *mach,
>   {
>      const uint resource_unit = inst->Src[1].Register.Index;
>      const uint sampler_unit = inst->Src[2].Register.Index;
> -   union tgsi_exec_channel r[4];
> +   union tgsi_exec_channel r[4], c1;
>      const union tgsi_exec_channel *lod =&ZeroVec;
> -   enum tgsi_sampler_control control;
> +   enum tgsi_sampler_control control = tgsi_sampler_lod_none;
>      uint chan;
>
>      assert(modifier != TEX_MODIFIER_PROJECTED);
>
>      if (modifier != TEX_MODIFIER_NONE) {
>         if (modifier == TEX_MODIFIER_LOD_BIAS) {
> -         FETCH(&r[3], 3, TGSI_CHAN_X);
> -         lod =&r[3];
> +         FETCH(&c1, 3, TGSI_CHAN_X);
> +         lod =&c1;
> +         control = tgsi_sampler_lod_bias;
>         }
>         else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
> -         FETCH(&r[3], 0, TGSI_CHAN_W);
> -         lod =&r[3];
> +         FETCH(&c1, 0, TGSI_CHAN_W);
> +         lod =&c1;
> +         control = tgsi_sampler_lod_explicit;
>         }
> -      else
> +      else {
>            assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
> -   }
> -
> -   if (modifier == TEX_MODIFIER_EXPLICIT_LOD ||
> -       modifier == TEX_MODIFIER_LEVEL_ZERO) {
> -      control = tgsi_sampler_lod_explicit;
> -   } else {
> -      control = tgsi_sampler_lod_bias;
> +         control = tgsi_sampler_lod_zero;
> +      }
>      }
>
>      FETCH(&r[0], 0, TGSI_CHAN_X);
> @@ -2161,13 +2161,13 @@ exec_sample(struct tgsi_exec_machine *mach,
>         if (compare) {
>            FETCH(&r[2], 3, TGSI_CHAN_X);
>            fetch_texel(mach->Samplers[sampler_unit],
> -&r[0],&ZeroVec,&r[2], lod,&ZeroVec,  /* S, T, P, LOD */
> +&r[0],&ZeroVec,&r[2],&ZeroVec, lod, /* S, T, P, LOD */
>                        control,
>                        &r[0],&r[1],&r[2],&r[3]);     /* R, G, B, A */
>         }
>         else {
>            fetch_texel(mach->Samplers[sampler_unit],
> -&r[0],&ZeroVec,&ZeroVec, lod,&ZeroVec,  /* S, T, P, LOD */
> +&r[0],&ZeroVec,&ZeroVec,&ZeroVec, lod, /* S, T, P, LOD */
>                        control,
>                        &r[0],&r[1],&r[2],&r[3]);     /* R, G, B, A */
>         }
> @@ -2180,13 +2180,13 @@ exec_sample(struct tgsi_exec_machine *mach,
>         if (compare) {
>            FETCH(&r[2], 3, TGSI_CHAN_X);
>            fetch_texel(mach->Samplers[sampler_unit],
> -&r[0],&r[1],&r[2], lod,&ZeroVec,     /* S, T, P, LOD */
> +&r[0],&r[1],&r[2],&ZeroVec, lod,    /* S, T, P, LOD */
>                        control,
>                        &r[0],&r[1],&r[2],&r[3]);  /* outputs */
>         }
>         else {
>            fetch_texel(mach->Samplers[sampler_unit],
> -&r[0],&r[1],&ZeroVec, lod,&ZeroVec,     /* S, T, P, LOD */
> +&r[0],&r[1],&ZeroVec,&ZeroVec, lod,    /* S, T, P, LOD */
>                        control,
>                        &r[0],&r[1],&r[2],&r[3]);  /* outputs */
>         }
> @@ -2200,13 +2200,13 @@ exec_sample(struct tgsi_exec_machine *mach,
>         if(compare) {
>            FETCH(&r[3], 3, TGSI_CHAN_X);
>            fetch_texel(mach->Samplers[sampler_unit],
> -&r[0],&r[1],&r[2],&r[3],&ZeroVec,
> +&r[0],&r[1],&r[2],&r[3], lod,
>                        control,
>                        &r[0],&r[1],&r[2],&r[3]);
>         }
>         else {
>            fetch_texel(mach->Samplers[sampler_unit],
> -&r[0],&r[1],&r[2], lod,&ZeroVec,
> +&r[0],&r[1],&r[2],&ZeroVec, lod,
>                        control,
>                        &r[0],&r[1],&r[2],&r[3]);
>         }
> @@ -2217,12 +2217,6 @@ exec_sample(struct tgsi_exec_machine *mach,
>         FETCH(&r[2], 0, TGSI_CHAN_Z);
>         FETCH(&r[3], 0, TGSI_CHAN_W);
>         if(compare) {
> -         assert(modifier == TEX_MODIFIER_NONE);
> -         /*
> -          * FIXME: lod bias and explicit lod are prohibited but
> -          * for sample_c_lz we pass the level zero info as explicit
> -          * lod 0.
> -          */
>            FETCH(&r[4], 3, TGSI_CHAN_X);
>            fetch_texel(mach->Samplers[sampler_unit],
>                        &r[0],&r[1],&r[2],&r[3],&r[4],
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
> index fbd28a2..911432a 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
> @@ -89,8 +89,11 @@ struct tgsi_interp_coef
>   };
>
>   enum tgsi_sampler_control {
> +   tgsi_sampler_lod_none,
>      tgsi_sampler_lod_bias,
> -   tgsi_sampler_lod_explicit
> +   tgsi_sampler_lod_explicit,
> +   tgsi_sampler_lod_zero
> +   /* FIXME: tgsi_sampler_derivs_explicit */
>   };
>
>   /**
> diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
> index e9575da..60c1f92 100644
> --- a/src/gallium/drivers/softpipe/sp_tex_sample.c
> +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
> @@ -1647,15 +1647,77 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
>    */
>   static INLINE void
>   compute_lod(const struct pipe_sampler_state *sampler,
> +            enum tgsi_sampler_control control,
>               const float biased_lambda,
> -            const float lodbias[TGSI_QUAD_SIZE],
> +            const float lod_in[TGSI_QUAD_SIZE],
>               float lod[TGSI_QUAD_SIZE])

Could you update the comments on that function to explain all the 
different lod parameters?  It's kind of confusing.

>   {
> +   float min_lod = sampler->min_lod;
> +   float max_lod = sampler->max_lod;
>      uint i;
>
> -   for (i = 0; i<  TGSI_QUAD_SIZE; i++) {
> -      lod[i] = biased_lambda + lodbias[i];
> -      lod[i] = CLAMP(lod[i], sampler->min_lod, sampler->max_lod);
> +   if (control == tgsi_sampler_lod_none ||
> +       control == tgsi_sampler_lod_zero) {
> +      float lod_scalar = CLAMP(biased_lambda, min_lod, max_lod);
> +      lod[0] = lod[1] = lod[2] = lod[3] = lod_scalar;
> +   }
> +   else if (control == tgsi_sampler_lod_bias) {
> +      for (i = 0; i<  TGSI_QUAD_SIZE; i++) {
> +         lod[i] = biased_lambda + lod_in[i];
> +         lod[i] = CLAMP(lod[i], min_lod, max_lod);
> +      }
> +   }
> +   else {
> +      assert(control == tgsi_sampler_lod_explicit);
> +      for (i = 0; i<  TGSI_QUAD_SIZE; i++) {
> +         lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
> +      }
> +   }

Not a big deal, but I'd probably use a switch instead of if/else.

> +}
> +
> +
> +/* Calculate level of detail for every fragment.

What are the c0, c1 parameters?

> + */
> +static INLINE void
> +compute_lambda_lod(struct sp_sampler_variant *samp,
> +                   const float s[TGSI_QUAD_SIZE],
> +                   const float t[TGSI_QUAD_SIZE],
> +                   const float p[TGSI_QUAD_SIZE],
> +                   const float c0[TGSI_QUAD_SIZE],
> +                   const float c1[TGSI_QUAD_SIZE],
> +                   enum tgsi_sampler_control control,
> +                   float lod[TGSI_QUAD_SIZE])
> +{
> +   const struct pipe_sampler_state *sampler = samp->sampler;
> +   float lod_bias = sampler->lod_bias;
> +   float min_lod = sampler->min_lod;
> +   float max_lod = sampler->max_lod;
> +   uint i;
> +
> +   if (control == tgsi_sampler_lod_none) {
> +      float lambda = samp->compute_lambda(samp, s, t, p) + lod_bias;
> +      float lod_scalar = CLAMP(lambda, min_lod, max_lod);
> +      lod[0] = lod[1] = lod[2] = lod[3] = lod_scalar;
> +   }
> +   else if (control == tgsi_sampler_lod_bias) {
> +      float lambda = samp->compute_lambda(samp, s, t, p) + lod_bias;
> +
> +      for (i = 0; i<  TGSI_QUAD_SIZE; i++) {
> +         lod[i] = lambda + c1[i];
> +         lod[i] = CLAMP(lod[i], min_lod, max_lod);
> +      }
> +   }
> +   else if (control == tgsi_sampler_lod_zero) {
> +      /* this is all static state in the sampler really need clamp here? */
> +      float lod_scalar = CLAMP(lod_bias, min_lod, max_lod);
> +      lod[0] = lod[1] = lod[2] = lod[3] = lod_scalar;
> +   }
> +   else {
> +      assert(control == tgsi_sampler_lod_explicit);
> +
> +      for (i = 0; i<  TGSI_QUAD_SIZE; i++) {
> +         lod[i] = CLAMP(c1[i], min_lod, max_lod);
> +      }
>      }

Another switch?

>   }
>
> @@ -1675,21 +1737,7 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
>      int j;
>      float lod[TGSI_QUAD_SIZE];
>
> -   if (control == tgsi_sampler_lod_bias) {
> -      float lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
> -      if (samp->key.bits.target == PIPE_TEXTURE_CUBE_ARRAY)
> -         compute_lod(samp->sampler, lambda, c1, lod);
> -      else
> -         compute_lod(samp->sampler, lambda, c0, lod);
> -   } else {
> -      assert(control == tgsi_sampler_lod_explicit);
> -
> -      if (samp->key.bits.target == PIPE_TEXTURE_CUBE_ARRAY)
> -         memcpy(lod, c1, sizeof(lod));
> -      else
> -         memcpy(lod, c0, sizeof(lod));
> -
> -   }
> +   compute_lambda_lod(samp, s, t, p, c0, c1, control, lod);
>
>      for (j = 0; j<  TGSI_QUAD_SIZE; j++) {
>         int level0 = samp->view->u.tex.first_level + (int)lod[j];
> @@ -1744,20 +1792,7 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
>      float lod[TGSI_QUAD_SIZE];
>      int j;
>
> -   if (control == tgsi_sampler_lod_bias) {
> -      float lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
> -      if (samp->key.bits.target == PIPE_TEXTURE_CUBE_ARRAY)
> -         compute_lod(samp->sampler, lambda, c1, lod);
> -      else
> -         compute_lod(samp->sampler, lambda, c0, lod);
> -   } else {
> -      assert(control == tgsi_sampler_lod_explicit);
> -
> -      if (samp->key.bits.target == PIPE_TEXTURE_CUBE_ARRAY)
> -         memcpy(lod, c1, sizeof(lod));
> -      else
> -         memcpy(lod, c0, sizeof(lod));
> -   }
> +   compute_lambda_lod(samp, s, t, p, c0, c1, control, lod);
>
>      for (j = 0; j<  TGSI_QUAD_SIZE; j++) {
>         if (lod[j]<  0.0)
> @@ -1791,20 +1826,7 @@ mip_filter_none(struct tgsi_sampler *tgsi_sampler,
>      float lod[TGSI_QUAD_SIZE];
>      int j;
>
> -   if (control == tgsi_sampler_lod_bias) {
> -      float lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
> -      if (samp->key.bits.target == PIPE_TEXTURE_CUBE_ARRAY)
> -         compute_lod(samp->sampler, lambda, c1, lod);
> -      else
> -         compute_lod(samp->sampler, lambda, c0, lod);
> -   } else {
> -      assert(control == tgsi_sampler_lod_explicit);
> -
> -      if (samp->key.bits.target == PIPE_TEXTURE_CUBE_ARRAY)
> -         memcpy(lod, c1, sizeof(lod));
> -      else
> -         memcpy(lod, c0, sizeof(lod));
> -   }
> +   compute_lambda_lod(samp, s, t, p, c0, c1, control, lod);
>
>      for (j = 0; j<  TGSI_QUAD_SIZE; j++) {
>         if (lod[j]<  0.0) {
> @@ -2077,7 +2099,8 @@ mip_filter_linear_aniso(struct tgsi_sampler *tgsi_sampler,
>      float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
>      float dvdy = (t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]) * t_to_v;
>
> -   if (control == tgsi_sampler_lod_bias) {
> +   if (control == tgsi_sampler_lod_bias ||
> +       control == tgsi_sampler_lod_none) {
>         /* note: instead of working with Px and Py, we will use the
>          * squared length instead, to avoid sqrt.
>          */
> @@ -2114,12 +2137,12 @@ mip_filter_linear_aniso(struct tgsi_sampler *tgsi_sampler,
>          * this since 0.5*log(x) = log(sqrt(x))
>          */
>         lambda = 0.5F * util_fast_log2(Pmin2) + samp->sampler->lod_bias;
> -      compute_lod(samp->sampler, lambda, c0, lod);
> +      compute_lod(samp->sampler, control, lambda, c1, lod);
>      }
>      else {
> -      assert(control == tgsi_sampler_lod_explicit);
> -
> -      memcpy(lod, c0, sizeof(lod));
> +      assert(control == tgsi_sampler_lod_explicit ||
> +             control == tgsi_sampler_lod_zero);
> +      compute_lod(samp->sampler, control, samp->sampler->lod_bias, c1, lod);
>      }
>
>      /* XXX: Take into account all lod values.
> @@ -2168,17 +2191,9 @@ mip_filter_linear_2d_linear_repeat_POT(
>      struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
>      const struct pipe_resource *texture = samp->view->texture;
>      int j;
> -   float lambda;
>      float lod[TGSI_QUAD_SIZE];
>
> -   if (control == tgsi_sampler_lod_bias) {
> -      lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
> -      compute_lod(samp->sampler, lambda, c0, lod);
> -   } else {
> -      assert(control == tgsi_sampler_lod_explicit);
> -
> -      memcpy(lod, c0, sizeof(lod));
> -   }
> +   compute_lambda_lod(samp, s, t, p, c0, c1, control, lod);
>
>      for (j = 0; j<  TGSI_QUAD_SIZE; j++) {
>         int level0 = samp->view->u.tex.first_level + (int)lod[j];

Looks good otherwise!

Reviewed-by: Brian Paul <brianp at vmware.com>