[Mesa-dev] [PATCH] gallivm: hook up dx10 sampling opcodes

Fri Feb 1 14:02:59 PST 2013

Am 01.02.2013 21:00, schrieb Brian Paul:
> On 02/01/2013 12:39 PM, sroland at vmware.com wrote:
>> From: Roland Scheidegger<sroland at vmware.com>
>>
>> They are similar to old-style tex opcodes but with separate sampler and
>> texture units (and other arguments in different places).
>> Also adjust the debug tgsi dump code.
>> ---
>>   src/gallium/auxiliary/gallivm/lp_bld_tgsi.h      |    6 +-
>>   src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c |  115 ++++++++-
>>   src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c  |  291
>> ++++++++++++++++++++++
>>   3 files changed, 406 insertions(+), 6 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
>> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
>> index 4898849..5d6b737 100644
>> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
>> @@ -68,7 +68,8 @@ enum lp_build_tex_modifier {
>>      LP_BLD_TEX_MODIFIER_PROJECTED,
>>      LP_BLD_TEX_MODIFIER_LOD_BIAS,
>>      LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
>> -   LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV
>> +   LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
>> +   LP_BLD_TEX_MODIFIER_LZ
> 
> Could you put a comment on the last one to say what LZ means?
Sure. Better yet, I'll rename it to LOD_ZERO.

> 
> 
>>   };
>>
>>
>> @@ -104,7 +105,8 @@ struct lp_tgsi_texture_info
>>   {
>>      struct lp_tgsi_channel_info coord[4];
>>      unsigned target:8; /* TGSI_TEXTURE_* */
>> -   unsigned unit:8;  /* Sampler unit */
>> +   unsigned sampler_unit:8;  /* Sampler unit */
>> +   unsigned texture_unit:8;  /* Texture unit */
>>      unsigned modifier:8; /* LP_BLD_TEX_MODIFIER_* */
> 
> Maybe we could just use ubyte instead of bitfields?
There's more bitfiled usage around there so this looks more consistent.
That said, using 8 bits for all of these seems arbitrary,
target would only require 5 bits, sampler_unit 4, texture_unit 7 (for
dx10 limits) and modifier only 3... At least for target/modifier I'm not
sure why they are using 8 bits.

> 
> 
>>   };
>>
>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
>> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
>> index 1c54ef9..2a445d6 100644
>> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
>> @@ -140,14 +140,104 @@ analyse_tex(struct analysis_context *ctx,
>>         if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
>>            /* We don't track explicit derivatives, although we could */
>>            indirect = TRUE;
>> -         tex_info->unit = inst->Src[3].Register.Index;
>> +         tex_info->sampler_unit = inst->Src[3].Register.Index;
>> +         tex_info->texture_unit = inst->Src[3].Register.Index;
>>         }  else {
>>            if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED ||
>>                modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
>>                modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
>>               readmask |= TGSI_WRITEMASK_W;
>>            }
>> -         tex_info->unit = inst->Src[1].Register.Index;
>> +         tex_info->sampler_unit = inst->Src[1].Register.Index;
>> +         tex_info->texture_unit = inst->Src[1].Register.Index;
>> +      }
>> +
>> +      for (chan = 0; chan<  4; ++chan) {
>> +         struct lp_tgsi_channel_info *chan_info =&tex_info->coord[chan];
>> +         if (readmask&  (1<<  chan)) {
>> +            analyse_src(ctx, chan_info,&inst->Src[0].Register, chan);
>> +            if (chan_info->file != TGSI_FILE_INPUT) {
>> +               indirect = TRUE;
>> +            }
>> +         } else {
>> +            memset(chan_info, 0, sizeof *chan_info);
>> +         }
>> +      }
>> +
>> +      if (indirect) {
>> +         info->indirect_textures = TRUE;
>> +      }
>> +
>> +      ++info->num_texs;
>> +   } else {
>> +      info->indirect_textures = TRUE;
>> +   }
>> +}
>> +
>> +
>> +static void
>> +analyse_sample(struct analysis_context *ctx,
>> +               const struct tgsi_full_instruction *inst,
>> +               enum lp_build_tex_modifier modifier)
> 
> It would be good to have a comment on this function (and analyse_tex())
> to say something about what they do.
Ok.

> 
> 
>> +{
>> +   struct lp_tgsi_info *info = ctx->info;
>> +   unsigned chan;
>> +
>> +   if (info->num_texs<  Elements(info->tex)) {
>> +      struct lp_tgsi_texture_info *tex_info =&info->tex[info->num_texs];
>> +      boolean indirect = FALSE;
>> +      boolean shadow = FALSE;
>> +      unsigned readmask = 0;
>> +
>> +      tex_info->target = inst->Texture.Texture;
>> +      switch (inst->Texture.Texture) {
>> +      case TGSI_TEXTURE_SHADOW1D:
>> +         shadow = TRUE;
>> +         /* Fallthrough */
>> +      case TGSI_TEXTURE_1D:
>> +         readmask = TGSI_WRITEMASK_X;
>> +         break;
>> +      case TGSI_TEXTURE_SHADOW1D_ARRAY:
>> +      case TGSI_TEXTURE_SHADOW2D:
>> +      case TGSI_TEXTURE_SHADOWRECT:
>> +         shadow = TRUE;
>> +         /* Fallthrough */
>> +      case TGSI_TEXTURE_1D_ARRAY:
>> +      case TGSI_TEXTURE_2D:
>> +      case TGSI_TEXTURE_RECT:
>> +         readmask = TGSI_WRITEMASK_XY;
>> +         break;
>> +      case TGSI_TEXTURE_SHADOW2D_ARRAY:
>> +      case TGSI_TEXTURE_SHADOWCUBE:
>> +         shadow = TRUE;
>> +         /* Fallthrough */
>> +      case TGSI_TEXTURE_2D_ARRAY:
>> +      case TGSI_TEXTURE_3D:
>> +      case TGSI_TEXTURE_CUBE:
>> +         readmask = TGSI_WRITEMASK_XYZ;
>> +         break;
>> +      case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
>> +         shadow = TRUE;
>> +         /* Fallthrough */
>> +      case TGSI_TEXTURE_CUBE_ARRAY:
>> +         readmask = TGSI_WRITEMASK_XYZW;
>> +         break;
>> +      default:
>> +         assert(0);
>> +         return;
>> +      }
>> +
>> +      tex_info->texture_unit = inst->Src[1].Register.Index;
>> +      tex_info->sampler_unit = inst->Src[2].Register.Index;
>> +
>> +      if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV ||
>> +          modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS || shadow) {
>> +         /* We don't track insts with additional regs, although we
>> could */
>> +         indirect = TRUE;
>> +      }  else {
>> +         if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
>> +            readmask |= TGSI_WRITEMASK_W;
>> +         }
>>         }
>>
>>         for (chan = 0; chan<  4; ++chan) {
>> @@ -229,6 +319,22 @@ analyse_instruction(struct analysis_context *ctx,
>>         case TGSI_OPCODE_TXP:
>>            analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
>>            break;
>> +      case TGSI_OPCODE_SAMPLE:
>> +      case TGSI_OPCODE_SAMPLE_C:
>> +         analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_NONE);
>> +         break;
>> +      case TGSI_OPCODE_SAMPLE_C_LZ:
>> +         analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_LZ);
>> +         break;
>> +      case TGSI_OPCODE_SAMPLE_D:
>> +         analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
>> +         break;
>> +      case TGSI_OPCODE_SAMPLE_B:
>> +         analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
>> +         break;
>> +      case TGSI_OPCODE_SAMPLE_L:
>> +         analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
>> +         break;
>>         default:
>>            break;
>>         }
>> @@ -355,8 +461,9 @@ dump_info(const struct tgsi_token *tokens,
>>               debug_printf(" _");
>>            }
>>         }
>> -      debug_printf(", SAMP[%u], %s\n",
>> -                   tex_info->unit,
>> +      debug_printf(", RES[%u], SAMP[%u], %s\n",
>> +                   tex_info->texture_unit,
>> +                   tex_info->sampler_unit,
>>                      tgsi_texture_names[tex_info->target]);
>>      }
>>
>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
>> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
>> index 0621fb4..c6801ac 100644
>> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
>> @@ -1340,6 +1340,197 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
>>   }
>>
>>   static void
>> +emit_sample(struct lp_build_tgsi_soa_context *bld,
>> +            const struct tgsi_full_instruction *inst,
>> +            enum lp_build_tex_modifier modifier,
>> +            LLVMValueRef *texel)
>> +{
>> +   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
>> +   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
>> +   unsigned texture_unit, sampler_unit;
>> +   LLVMValueRef lod_bias, explicit_lod;
>> +   LLVMValueRef coords[4];
>> +   LLVMValueRef offsets[3] = { NULL };
>> +   struct lp_derivatives derivs;
>> +   unsigned num_coords, dims;
>> +   unsigned i;
>> +   boolean compare = FALSE;
>> +
>> +   if (!bld->sampler) {
>> +      _debug_printf("warning: found texture instruction but no
>> sampler generator supplied\n");
>> +      for (i = 0; i<  4; i++) {
>> +         texel[i] = bld->bld_base.base.undef;
>> +      }
>> +      return;
>> +   }
>> +
>> +   derivs.ddx_ddy[0] = bld->bld_base.base.undef;
>> +   derivs.ddx_ddy[1] = bld->bld_base.base.undef;
>> +
>> +   switch (inst->Texture.Texture) {
>> +   case TGSI_TEXTURE_SHADOW1D:
>> +      compare = TRUE;
>> +      /* Fallthrough */
>> +   case TGSI_TEXTURE_1D:
>> +      num_coords = 1;
>> +      dims = 1;
>> +      break;
>> +   case TGSI_TEXTURE_SHADOW1D_ARRAY:
>> +      compare = TRUE;
>> +      /* Fallthrough */
>> +   case TGSI_TEXTURE_1D_ARRAY:
>> +      num_coords = 2;
>> +      dims = 1;
>> +      break;
>> +   case TGSI_TEXTURE_SHADOW2D:
>> +   case TGSI_TEXTURE_SHADOWRECT:
>> +      compare = TRUE;
>> +      /* Fallthrough */
>> +   case TGSI_TEXTURE_2D:
>> +   case TGSI_TEXTURE_RECT:
>> +      num_coords = 2;
>> +      dims = 2;
>> +      break;
>> +   case TGSI_TEXTURE_SHADOW2D_ARRAY:
>> +   case TGSI_TEXTURE_SHADOWCUBE:
>> +      compare = TRUE;
>> +      /* Fallthrough */
>> +   case TGSI_TEXTURE_2D_ARRAY:
>> +   case TGSI_TEXTURE_CUBE:
>> +      num_coords = 3;
>> +      dims = 2;
>> +      break;
>> +   case TGSI_TEXTURE_3D:
>> +      num_coords = 3;
>> +      dims = 3;
>> +      break;
>> +   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
>> +      compare = TRUE;
>> +      /* Fallthrough */
>> +   case TGSI_TEXTURE_CUBE_ARRAY:
>> +      num_coords = 4;
>> +      dims = 3;
>> +      break;
>> +   default:
>> +      assert(0);
>> +      return;
>> +   }
>> +
>> +   /* unlike tex opcodes the sampler/texture unit always come from
>> same reg */
>> +   texture_unit = inst->Src[1].Register.Index;
>> +   sampler_unit = inst->Src[2].Register.Index;
> 
> The comment and code don't seem to agree there.
> 
> [...]
Yes this needs reformulating. (The idea was that tex_unit is always
taken from reg 1, sampler_unit from reg 2 - unlike the old texture
opcodes where sampler doesn't have a fixed location but is last src reg
used instead.)

> 
> 
> Otherwise looks good AFAICT.
> 
> Reviewed-by: Brian Paul <brianp at vmware.com>