[Mesa-dev] [PATCH 1/2] softpipe: clean up lod computation

Fri Feb 8 10:12:37 PST 2013

Am 08.02.2013 16:22, schrieb Brian Paul:
> On 02/06/2013 10:59 AM, sroland at vmware.com wrote:
>> From: Roland Scheidegger<sroland at vmware.com>
>>
>> This should handle the new lod_zero modifier more correctly.
>> The runtime-conditional is a bit more complex however we now also do
>> scalar lod computation when appropriate which should more than make up
>> for it.
>> The refactoring should also fix an issue with explicit lods
>> (lod clamp wasn't applied to them).
>> Also, always pass lod as the 5th element from tgsi executor, which
>> simplifies
>> things (get rid of annoying conditionals later).
>> ---
>>   src/gallium/auxiliary/tgsi/tgsi_exec.c       |   64 ++++++------
>>   src/gallium/auxiliary/tgsi/tgsi_exec.h       |    5 +-
>>   src/gallium/drivers/softpipe/sp_tex_sample.c |  137
>> ++++++++++++++------------
>>   3 files changed, 109 insertions(+), 97 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c
>> b/src/gallium/auxiliary/tgsi/tgsi_exec.c
>> index 6d04cdc..2f5db82 100644
>> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
>> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
>> @@ -1718,6 +1718,7 @@ fetch_texel( struct tgsi_sampler *sampler,
>>      uint j;
>>      float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
>>
>> +   /* FIXME: handle explicit derivs, offsets */
>>      sampler->get_samples(sampler, s->f, t->f, p->f, c0->f, c1->f,
>> control, rgba);
>>
>>      for (j = 0; j<  4; j++) {
>> @@ -1750,9 +1751,11 @@ exec_tex(struct tgsi_exec_machine *mach,
>>      const uint unit = inst->Src[sampler].Register.Index;
>>      union tgsi_exec_channel r[4], cubearraycomp, cubelod;
>>      const union tgsi_exec_channel *lod =&ZeroVec;
>> -   enum tgsi_sampler_control control;
>> +   enum tgsi_sampler_control control =  tgsi_sampler_lod_none;
>>      uint chan;
>>
>> +   assert(modifier != TEX_MODIFIER_LEVEL_ZERO);
>> +
>>      if (modifier != TEX_MODIFIER_NONE&&  (sampler == 1)) {
>>         FETCH(&r[3], 0, TGSI_CHAN_W);
>>         if (modifier != TEX_MODIFIER_PROJECTED) {
>> @@ -1762,7 +1765,7 @@ exec_tex(struct tgsi_exec_machine *mach,
>>
>>      if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
>>         control = tgsi_sampler_lod_explicit;
>> -   } else {
>> +   } else if (modifier == TEX_MODIFIER_LOD_BIAS){
>>         control = tgsi_sampler_lod_bias;
>>      }
>>
>> @@ -1775,7 +1778,7 @@ exec_tex(struct tgsi_exec_machine *mach,
>>         }
>>
>>         fetch_texel(mach->Samplers[unit],
>> -&r[0],&ZeroVec,&ZeroVec, lod,&ZeroVec, /* S, T, P, LOD */
>> +&r[0],&ZeroVec,&ZeroVec,&ZeroVec, lod, /* S, T, P, LOD */
> 
> I think the /* S, T, P, LOD */ comments should be updated while you're
> at it.  There's 5 arguments on that line, but the comment only has 4
> names.  That's been wrong for a while.
That is true I noticed this as well. But I couldn't think of what the
names should be so I just left the comment broken. Fact is "p" is really
not p, sometimes it's "r" sometimes it's "c", same for the forth
parameter which can be either 4th coord for cube arrays or c, or the 5th
parameter which is now usually lod (but c for shadow cube array). I
guess could update the comments to what the param means where it's used
but the actual parameter names are just similarly bogus (due to their
overloaded nature).


> 
> 
>>                     control,
>>                     &r[0],&r[1],&r[2],&r[3]);     /* R, G, B, A */
>>         break;
>> @@ -1788,7 +1791,7 @@ exec_tex(struct tgsi_exec_machine *mach,
>>         }
>>
>>         fetch_texel(mach->Samplers[unit],
>> -&r[0],&ZeroVec,&r[2], lod,&ZeroVec, /* S, T, P, LOD */
>> +&r[0],&ZeroVec,&r[2],&ZeroVec, lod, /* S, T, P, LOD */
>>                     control,
>>                     &r[0],&r[1],&r[2],&r[3]);     /* R, G, B, A */
>>         break;
>> @@ -1808,7 +1811,7 @@ exec_tex(struct tgsi_exec_machine *mach,
>>         }
>>
>>         fetch_texel(mach->Samplers[unit],
>> -&r[0],&r[1],&r[2], lod,&ZeroVec,    /* S, T, P, LOD */
>> +&r[0],&r[1],&r[2],&ZeroVec, lod,    /* S, T, P, LOD */
>>                     control,
>>                     &r[0],&r[1],&r[2],&r[3]);  /* outputs */
>>         break;
>> @@ -1822,7 +1825,7 @@ exec_tex(struct tgsi_exec_machine *mach,
>>         }
>>
>>         fetch_texel(mach->Samplers[unit],
>> -&r[0],&r[1],&ZeroVec, lod,&ZeroVec,    /* S, T, P, LOD */
>> +&r[0],&r[1],&ZeroVec,&ZeroVec, lod,   /* S, T, P, LOD */
>>                     control,
>>                     &r[0],&r[1],&r[2],&r[3]);  /* outputs */
>>         break;
>> @@ -1836,7 +1839,7 @@ exec_tex(struct tgsi_exec_machine *mach,
>>         }
>>
>>         fetch_texel(mach->Samplers[unit],
>> -&r[0],&r[1],&r[2], lod,&ZeroVec,    /* S, T, P, LOD */
>> +&r[0],&r[1],&r[2],&ZeroVec, lod,   /* S, T, P, LOD */
>>                     control,
>>                     &r[0],&r[1],&r[2],&r[3]);  /* outputs */
>>         break;
>> @@ -1852,7 +1855,7 @@ exec_tex(struct tgsi_exec_machine *mach,
>>         }
>>
>>         fetch_texel(mach->Samplers[unit],
>> -&r[0],&r[1],&r[2], lod,&ZeroVec,    /* S, T, P, LOD */
>> +&r[0],&r[1],&r[2],&ZeroVec, lod,   /* S, T, P, LOD */
>>                     control,
>>                     &r[0],&r[1],&r[2],&r[3]);  /* outputs */
>>         break;
>> @@ -1898,7 +1901,7 @@ exec_tex(struct tgsi_exec_machine *mach,
>>         }
>>
>>         fetch_texel(mach->Samplers[unit],
>> -&r[0],&r[1],&r[2], lod,&ZeroVec,
>> +&r[0],&r[1],&r[2],&ZeroVec, lod,
>>                     control,
>>                     &r[0],&r[1],&r[2],&r[3]);
>>         break;
>> @@ -2127,31 +2130,28 @@ exec_sample(struct tgsi_exec_machine *mach,
>>   {
>>      const uint resource_unit = inst->Src[1].Register.Index;
>>      const uint sampler_unit = inst->Src[2].Register.Index;
>> -   union tgsi_exec_channel r[4];
>> +   union tgsi_exec_channel r[4], c1;
>>      const union tgsi_exec_channel *lod =&ZeroVec;
>> -   enum tgsi_sampler_control control;
>> +   enum tgsi_sampler_control control = tgsi_sampler_lod_none;
>>      uint chan;
>>
>>      assert(modifier != TEX_MODIFIER_PROJECTED);
>>
>>      if (modifier != TEX_MODIFIER_NONE) {
>>         if (modifier == TEX_MODIFIER_LOD_BIAS) {
>> -         FETCH(&r[3], 3, TGSI_CHAN_X);
>> -         lod =&r[3];
>> +         FETCH(&c1, 3, TGSI_CHAN_X);
>> +         lod =&c1;
>> +         control = tgsi_sampler_lod_bias;
>>         }
>>         else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
>> -         FETCH(&r[3], 0, TGSI_CHAN_W);
>> -         lod =&r[3];
>> +         FETCH(&c1, 0, TGSI_CHAN_W);
>> +         lod =&c1;
>> +         control = tgsi_sampler_lod_explicit;
>>         }
>> -      else
>> +      else {
>>            assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
>> -   }
>> -
>> -   if (modifier == TEX_MODIFIER_EXPLICIT_LOD ||
>> -       modifier == TEX_MODIFIER_LEVEL_ZERO) {
>> -      control = tgsi_sampler_lod_explicit;
>> -   } else {
>> -      control = tgsi_sampler_lod_bias;
>> +         control = tgsi_sampler_lod_zero;
>> +      }
>>      }
>>
>>      FETCH(&r[0], 0, TGSI_CHAN_X);
>> @@ -2161,13 +2161,13 @@ exec_sample(struct tgsi_exec_machine *mach,
>>         if (compare) {
>>            FETCH(&r[2], 3, TGSI_CHAN_X);
>>            fetch_texel(mach->Samplers[sampler_unit],
>> -&r[0],&ZeroVec,&r[2], lod,&ZeroVec,  /* S, T, P, LOD */
>> +&r[0],&ZeroVec,&r[2],&ZeroVec, lod, /* S, T, P, LOD */
>>                        control,
>>                        &r[0],&r[1],&r[2],&r[3]);     /* R, G, B, A */
>>         }
>>         else {
>>            fetch_texel(mach->Samplers[sampler_unit],
>> -&r[0],&ZeroVec,&ZeroVec, lod,&ZeroVec,  /* S, T, P, LOD */
>> +&r[0],&ZeroVec,&ZeroVec,&ZeroVec, lod, /* S, T, P, LOD */
>>                        control,
>>                        &r[0],&r[1],&r[2],&r[3]);     /* R, G, B, A */
>>         }
>> @@ -2180,13 +2180,13 @@ exec_sample(struct tgsi_exec_machine *mach,
>>         if (compare) {
>>            FETCH(&r[2], 3, TGSI_CHAN_X);
>>            fetch_texel(mach->Samplers[sampler_unit],
>> -&r[0],&r[1],&r[2], lod,&ZeroVec,     /* S, T, P, LOD */
>> +&r[0],&r[1],&r[2],&ZeroVec, lod,    /* S, T, P, LOD */
>>                        control,
>>                        &r[0],&r[1],&r[2],&r[3]);  /* outputs */
>>         }
>>         else {
>>            fetch_texel(mach->Samplers[sampler_unit],
>> -&r[0],&r[1],&ZeroVec, lod,&ZeroVec,     /* S, T, P, LOD */
>> +&r[0],&r[1],&ZeroVec,&ZeroVec, lod,    /* S, T, P, LOD */
>>                        control,
>>                        &r[0],&r[1],&r[2],&r[3]);  /* outputs */
>>         }
>> @@ -2200,13 +2200,13 @@ exec_sample(struct tgsi_exec_machine *mach,
>>         if(compare) {
>>            FETCH(&r[3], 3, TGSI_CHAN_X);
>>            fetch_texel(mach->Samplers[sampler_unit],
>> -&r[0],&r[1],&r[2],&r[3],&ZeroVec,
>> +&r[0],&r[1],&r[2],&r[3], lod,
>>                        control,
>>                        &r[0],&r[1],&r[2],&r[3]);
>>         }
>>         else {
>>            fetch_texel(mach->Samplers[sampler_unit],
>> -&r[0],&r[1],&r[2], lod,&ZeroVec,
>> +&r[0],&r[1],&r[2],&ZeroVec, lod,
>>                        control,
>>                        &r[0],&r[1],&r[2],&r[3]);
>>         }
>> @@ -2217,12 +2217,6 @@ exec_sample(struct tgsi_exec_machine *mach,
>>         FETCH(&r[2], 0, TGSI_CHAN_Z);
>>         FETCH(&r[3], 0, TGSI_CHAN_W);
>>         if(compare) {
>> -         assert(modifier == TEX_MODIFIER_NONE);
>> -         /*
>> -          * FIXME: lod bias and explicit lod are prohibited but
>> -          * for sample_c_lz we pass the level zero info as explicit
>> -          * lod 0.
>> -          */
>>            FETCH(&r[4], 3, TGSI_CHAN_X);
>>            fetch_texel(mach->Samplers[sampler_unit],
>>                        &r[0],&r[1],&r[2],&r[3],&r[4],
>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h
>> b/src/gallium/auxiliary/tgsi/tgsi_exec.h
>> index fbd28a2..911432a 100644
>> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
>> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
>> @@ -89,8 +89,11 @@ struct tgsi_interp_coef
>>   };
>>
>>   enum tgsi_sampler_control {
>> +   tgsi_sampler_lod_none,
>>      tgsi_sampler_lod_bias,
>> -   tgsi_sampler_lod_explicit
>> +   tgsi_sampler_lod_explicit,
>> +   tgsi_sampler_lod_zero
>> +   /* FIXME: tgsi_sampler_derivs_explicit */
>>   };
>>
>>   /**
>> diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c
>> b/src/gallium/drivers/softpipe/sp_tex_sample.c
>> index e9575da..60c1f92 100644
>> --- a/src/gallium/drivers/softpipe/sp_tex_sample.c
>> +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
>> @@ -1647,15 +1647,77 @@ img_filter_3d_linear(struct tgsi_sampler
>> *tgsi_sampler,
>>    */
>>   static INLINE void
>>   compute_lod(const struct pipe_sampler_state *sampler,
>> +            enum tgsi_sampler_control control,
>>               const float biased_lambda,
>> -            const float lodbias[TGSI_QUAD_SIZE],
>> +            const float lod_in[TGSI_QUAD_SIZE],
>>               float lod[TGSI_QUAD_SIZE])
> 
> Could you update the comments on that function to explain all the
> different lod parameters?  It's kind of confusing.
Ok.

> 
> 
>>   {
>> +   float min_lod = sampler->min_lod;
>> +   float max_lod = sampler->max_lod;
>>      uint i;
>>
>> -   for (i = 0; i<  TGSI_QUAD_SIZE; i++) {
>> -      lod[i] = biased_lambda + lodbias[i];
>> -      lod[i] = CLAMP(lod[i], sampler->min_lod, sampler->max_lod);
>> +   if (control == tgsi_sampler_lod_none ||
>> +       control == tgsi_sampler_lod_zero) {
>> +      float lod_scalar = CLAMP(biased_lambda, min_lod, max_lod);
>> +      lod[0] = lod[1] = lod[2] = lod[3] = lod_scalar;
>> +   }
>> +   else if (control == tgsi_sampler_lod_bias) {
>> +      for (i = 0; i<  TGSI_QUAD_SIZE; i++) {
>> +         lod[i] = biased_lambda + lod_in[i];
>> +         lod[i] = CLAMP(lod[i], min_lod, max_lod);
>> +      }
>> +   }
>> +   else {
>> +      assert(control == tgsi_sampler_lod_explicit);
>> +      for (i = 0; i<  TGSI_QUAD_SIZE; i++) {
>> +         lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
>> +      }
>> +   }
> 
> Not a big deal, but I'd probably use a switch instead of if/else.
Yes, makes sense.

> 
> 
>> +}
>> +
>> +
>> +/* Calculate level of detail for every fragment.
> 
> What are the c0, c1 parameters?
Ah good catch. These were just the names as used in the sampling
interface. But since I changed it to always use the 5th parameter for
lod (c1) can rename that here. Plus c0 is now unused here so I'll fix that.


> 
> 
>> + */
>> +static INLINE void
>> +compute_lambda_lod(struct sp_sampler_variant *samp,
>> +                   const float s[TGSI_QUAD_SIZE],
>> +                   const float t[TGSI_QUAD_SIZE],
>> +                   const float p[TGSI_QUAD_SIZE],
>> +                   const float c0[TGSI_QUAD_SIZE],
>> +                   const float c1[TGSI_QUAD_SIZE],
>> +                   enum tgsi_sampler_control control,
>> +                   float lod[TGSI_QUAD_SIZE])
>> +{
>> +   const struct pipe_sampler_state *sampler = samp->sampler;
>> +   float lod_bias = sampler->lod_bias;
>> +   float min_lod = sampler->min_lod;
>> +   float max_lod = sampler->max_lod;
>> +   uint i;
>> +
>> +   if (control == tgsi_sampler_lod_none) {
>> +      float lambda = samp->compute_lambda(samp, s, t, p) + lod_bias;
>> +      float lod_scalar = CLAMP(lambda, min_lod, max_lod);
>> +      lod[0] = lod[1] = lod[2] = lod[3] = lod_scalar;
>> +   }
>> +   else if (control == tgsi_sampler_lod_bias) {
>> +      float lambda = samp->compute_lambda(samp, s, t, p) + lod_bias;
>> +
>> +      for (i = 0; i<  TGSI_QUAD_SIZE; i++) {
>> +         lod[i] = lambda + c1[i];
>> +         lod[i] = CLAMP(lod[i], min_lod, max_lod);
>> +      }
>> +   }
>> +   else if (control == tgsi_sampler_lod_zero) {
>> +      /* this is all static state in the sampler really need clamp
>> here? */
>> +      float lod_scalar = CLAMP(lod_bias, min_lod, max_lod);
>> +      lod[0] = lod[1] = lod[2] = lod[3] = lod_scalar;
>> +   }
>> +   else {
>> +      assert(control == tgsi_sampler_lod_explicit);
>> +
>> +      for (i = 0; i<  TGSI_QUAD_SIZE; i++) {
>> +         lod[i] = CLAMP(c1[i], min_lod, max_lod);
>> +      }
>>      }
> 
> Another switch?
Yes.

> 
> 
>>   }
>>
>> @@ -1675,21 +1737,7 @@ mip_filter_linear(struct tgsi_sampler
>> *tgsi_sampler,
>>      int j;
>>      float lod[TGSI_QUAD_SIZE];
>>
>> -   if (control == tgsi_sampler_lod_bias) {
>> -      float lambda = samp->compute_lambda(samp, s, t, p) +
>> samp->sampler->lod_bias;
>> -      if (samp->key.bits.target == PIPE_TEXTURE_CUBE_ARRAY)
>> -         compute_lod(samp->sampler, lambda, c1, lod);
>> -      else
>> -         compute_lod(samp->sampler, lambda, c0, lod);
>> -   } else {
>> -      assert(control == tgsi_sampler_lod_explicit);
>> -
>> -      if (samp->key.bits.target == PIPE_TEXTURE_CUBE_ARRAY)
>> -         memcpy(lod, c1, sizeof(lod));
>> -      else
>> -         memcpy(lod, c0, sizeof(lod));
>> -
>> -   }
>> +   compute_lambda_lod(samp, s, t, p, c0, c1, control, lod);
>>
>>      for (j = 0; j<  TGSI_QUAD_SIZE; j++) {
>>         int level0 = samp->view->u.tex.first_level + (int)lod[j];
>> @@ -1744,20 +1792,7 @@ mip_filter_nearest(struct tgsi_sampler
>> *tgsi_sampler,
>>      float lod[TGSI_QUAD_SIZE];
>>      int j;
>>
>> -   if (control == tgsi_sampler_lod_bias) {
>> -      float lambda = samp->compute_lambda(samp, s, t, p) +
>> samp->sampler->lod_bias;
>> -      if (samp->key.bits.target == PIPE_TEXTURE_CUBE_ARRAY)
>> -         compute_lod(samp->sampler, lambda, c1, lod);
>> -      else
>> -         compute_lod(samp->sampler, lambda, c0, lod);
>> -   } else {
>> -      assert(control == tgsi_sampler_lod_explicit);
>> -
>> -      if (samp->key.bits.target == PIPE_TEXTURE_CUBE_ARRAY)
>> -         memcpy(lod, c1, sizeof(lod));
>> -      else
>> -         memcpy(lod, c0, sizeof(lod));
>> -   }
>> +   compute_lambda_lod(samp, s, t, p, c0, c1, control, lod);
>>
>>      for (j = 0; j<  TGSI_QUAD_SIZE; j++) {
>>         if (lod[j]<  0.0)
>> @@ -1791,20 +1826,7 @@ mip_filter_none(struct tgsi_sampler *tgsi_sampler,
>>      float lod[TGSI_QUAD_SIZE];
>>      int j;
>>
>> -   if (control == tgsi_sampler_lod_bias) {
>> -      float lambda = samp->compute_lambda(samp, s, t, p) +
>> samp->sampler->lod_bias;
>> -      if (samp->key.bits.target == PIPE_TEXTURE_CUBE_ARRAY)
>> -         compute_lod(samp->sampler, lambda, c1, lod);
>> -      else
>> -         compute_lod(samp->sampler, lambda, c0, lod);
>> -   } else {
>> -      assert(control == tgsi_sampler_lod_explicit);
>> -
>> -      if (samp->key.bits.target == PIPE_TEXTURE_CUBE_ARRAY)
>> -         memcpy(lod, c1, sizeof(lod));
>> -      else
>> -         memcpy(lod, c0, sizeof(lod));
>> -   }
>> +   compute_lambda_lod(samp, s, t, p, c0, c1, control, lod);
>>
>>      for (j = 0; j<  TGSI_QUAD_SIZE; j++) {
>>         if (lod[j]<  0.0) {
>> @@ -2077,7 +2099,8 @@ mip_filter_linear_aniso(struct tgsi_sampler
>> *tgsi_sampler,
>>      float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
>>      float dvdy = (t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]) * t_to_v;
>>
>> -   if (control == tgsi_sampler_lod_bias) {
>> +   if (control == tgsi_sampler_lod_bias ||
>> +       control == tgsi_sampler_lod_none) {
>>         /* note: instead of working with Px and Py, we will use the
>>          * squared length instead, to avoid sqrt.
>>          */
>> @@ -2114,12 +2137,12 @@ mip_filter_linear_aniso(struct tgsi_sampler
>> *tgsi_sampler,
>>          * this since 0.5*log(x) = log(sqrt(x))
>>          */
>>         lambda = 0.5F * util_fast_log2(Pmin2) + samp->sampler->lod_bias;
>> -      compute_lod(samp->sampler, lambda, c0, lod);
>> +      compute_lod(samp->sampler, control, lambda, c1, lod);
>>      }
>>      else {
>> -      assert(control == tgsi_sampler_lod_explicit);
>> -
>> -      memcpy(lod, c0, sizeof(lod));
>> +      assert(control == tgsi_sampler_lod_explicit ||
>> +             control == tgsi_sampler_lod_zero);
>> +      compute_lod(samp->sampler, control, samp->sampler->lod_bias,
>> c1, lod);
>>      }
>>
>>      /* XXX: Take into account all lod values.
>> @@ -2168,17 +2191,9 @@ mip_filter_linear_2d_linear_repeat_POT(
>>      struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
>>      const struct pipe_resource *texture = samp->view->texture;
>>      int j;
>> -   float lambda;
>>      float lod[TGSI_QUAD_SIZE];
>>
>> -   if (control == tgsi_sampler_lod_bias) {
>> -      lambda = samp->compute_lambda(samp, s, t, p) +
>> samp->sampler->lod_bias;
>> -      compute_lod(samp->sampler, lambda, c0, lod);
>> -   } else {
>> -      assert(control == tgsi_sampler_lod_explicit);
>> -
>> -      memcpy(lod, c0, sizeof(lod));
>> -   }
>> +   compute_lambda_lod(samp, s, t, p, c0, c1, control, lod);
>>
>>      for (j = 0; j<  TGSI_QUAD_SIZE; j++) {
>>         int level0 = samp->view->u.tex.first_level + (int)lod[j];
> 
> Looks good otherwise!
> 
> Reviewed-by: Brian Paul <brianp at vmware.com>