[Mesa-dev] [PATCH] gallium: add new LOD opcode

Thu Sep 28 18:22:58 UTC 2017

Am 28.09.2017 um 15:43 schrieb Jose Fonseca:
> On 28/09/17 02:46, sroland at vmware.com wrote:
>> From: Roland Scheidegger <sroland at vmware.com>
>>
>> The operation performed is all the same as LODQ, but with the usual
>> differences between dx10 and GL texture opcodes, that is separate
>> resource
>> and sampler indices (plus result swizzling, and setting z/w channels
>> to zero).
>> ---
>>   src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 14 ++++++++
>>   src/gallium/auxiliary/tgsi/tgsi_exec.c          | 48
>> ++++++++++++++++++++++---
>>   src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h  |  1 +
>>   src/gallium/docs/source/tgsi.rst                | 12 +++++++
>>   src/gallium/include/pipe/p_shader_tokens.h      |  4 ++-
>>   5 files changed, 74 insertions(+), 5 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
>> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
>> index 3e47372..fe0068b 100644
>> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
>> @@ -3285,6 +3285,18 @@ sviewinfo_emit(
>>      emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
>>   }
>>   +static void
>> +lod_emit(
>> +   const struct lp_build_tgsi_action * action,
>> +   struct lp_build_tgsi_context * bld_base,
>> +   struct lp_build_emit_data * emit_data)
>> +{
>> +   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
>> +
>> +   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
>> +               FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
>> +}
>> +
>>   static LLVMValueRef
>>   mask_vec(struct lp_build_tgsi_context *bld_base)
>>   {
>> @@ -3899,6 +3911,8 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
>>      bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
>>      bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
>>      bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit =
>> sviewinfo_emit;
>> +   bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
>> +
>>        if (gs_iface) {
>>         /* There's no specific value for this because it should always
>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c
>> b/src/gallium/auxiliary/tgsi/tgsi_exec.c
>> index 1264df0..4257a60 100644
>> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
>> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
>> @@ -2340,15 +2340,22 @@ static void
>>   exec_lodq(struct tgsi_exec_machine *mach,
>>             const struct tgsi_full_instruction *inst)
>>   {
>> -   uint unit;
>> +   uint resource_unit, sampler_unit;
>>      int dim;
>>      int i;
>>      union tgsi_exec_channel coords[4];
>>      const union tgsi_exec_channel *args[ARRAY_SIZE(coords)];
>>      union tgsi_exec_channel r[2];
>>   -   unit = fetch_sampler_unit(mach, inst, 1);
>> -   dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
>> +   resource_unit = fetch_sampler_unit(mach, inst, 1);
>> +   if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
>> +      uint target = mach->SamplerViews[resource_unit].Resource;
>> +      dim = tgsi_util_get_texture_coord_dim(target);
>> +      sampler_unit = fetch_sampler_unit(mach, inst, 2);
>> +   } else {
>> +      dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
>> +      sampler_unit = resource_unit;
>> +   }
>>      assert(dim <= ARRAY_SIZE(coords));
>>      /* fetch coordinates */
>>      for (i = 0; i < dim; i++) {
>> @@ -2358,7 +2365,7 @@ exec_lodq(struct tgsi_exec_machine *mach,
>>      for (i = dim; i < ARRAY_SIZE(coords); i++) {
>>         args[i] = &ZeroVec;
>>      }
>> -   mach->Sampler->query_lod(mach->Sampler, unit, unit,
>> +   mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit,
>>                               args[0]->f,
>>                               args[1]->f,
>>                               args[2]->f,
>> @@ -2375,6 +2382,35 @@ exec_lodq(struct tgsi_exec_machine *mach,
>>         store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
>>                    TGSI_EXEC_DATA_FLOAT);
>>      }
>> +   if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
>> +      unsigned char swizzles[4];
>> +      unsigned chan;
>> +      swizzles[0] = inst->Src[1].Register.SwizzleX;
>> +      swizzles[1] = inst->Src[1].Register.SwizzleY;
>> +      swizzles[2] = inst->Src[1].Register.SwizzleZ;
>> +      swizzles[3] = inst->Src[1].Register.SwizzleW;
>> +
>> +      for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
>> +         if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
>> +            if (swizzles[chan] >= 2) {
>> +               store_dest(mach, &ZeroVec,
>> +                          &inst->Dst[0], inst, chan,
>> TGSI_EXEC_DATA_FLOAT);
>> +            } else {
>> +               store_dest(mach, &r[swizzles[chan]],
>> +                          &inst->Dst[0], inst, chan,
>> TGSI_EXEC_DATA_FLOAT);
>> +            }
>> +         }
>> +      }
>> +   } else {
>> +      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
>> +         store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
>> +                    TGSI_EXEC_DATA_FLOAT);
>> +      }
>> +      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
>> +         store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
>> +                    TGSI_EXEC_DATA_FLOAT);
>> +      }
>> +   }
>>   }
>>     static void
>> @@ -5705,6 +5741,10 @@ exec_instruction(
>>         assert(0);
>>         break;
>>   +   case TGSI_OPCODE_LOD:
>> +      exec_lodq(mach, inst);
>> +      break;
>> +
>>      case TGSI_OPCODE_UARL:
>>         exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT,
>> TGSI_EXEC_DATA_UINT);
>>         break;
>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h
>> b/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h
>> index a4a9771..95a29fd 100644
>> --- a/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h
>> +++ b/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h
>> @@ -249,3 +249,4 @@ OPCODE(1, 2, COMP, U64DIV)
>>   OPCODE(1, 2, COMP, I64MOD)
>>   OPCODE(1, 2, COMP, U64MOD)
>>   OPCODE(1, 2, COMP, DDIV)
>> +OPCODE(1, 3, OTHR, LOD)
>> diff --git a/src/gallium/docs/source/tgsi.rst
>> b/src/gallium/docs/source/tgsi.rst
>> index 0bd9964..3f80785 100644
>> --- a/src/gallium/docs/source/tgsi.rst
>> +++ b/src/gallium/docs/source/tgsi.rst
>> @@ -2469,6 +2469,18 @@ after lookup.
>>     NOTE: no driver has implemented this opcode yet (and no state tracker
>>     emits it).  This information is subject to change.
>>   +.. opcode:: LOD - level of detail
>> +
>> +   Same syntax as the SAMPLE opcode but instead of performing an actual
>> +   texture lookup/filter, return the computed LOD information that the
>> +   texture pipe would use to access the texture. The Y component
>> contains
>> +   the computed LOD lambda_prime. The X component contains the LOD
>> that will
>> +   be accessed, based on min/max lod's and mipmap filters.
>> +   The Z and W components are set to 0.
>> +
>> +   Syntax: ``LOD dst, address, sampler_view, sampler``
>> +
>> +
>>   .. _resourceopcodes:
>>     Resource Access Opcodes
>> diff --git a/src/gallium/include/pipe/p_shader_tokens.h
>> b/src/gallium/include/pipe/p_shader_tokens.h
>> index fa73054..0bb6739 100644
>> --- a/src/gallium/include/pipe/p_shader_tokens.h
>> +++ b/src/gallium/include/pipe/p_shader_tokens.h
>> @@ -607,7 +607,9 @@ struct tgsi_property_data {
>>     #define TGSI_OPCODE_DDIV                248
>>   -#define TGSI_OPCODE_LAST                249
>> +#define TGSI_OPCODE_LOD                 249
>> +
>> +#define TGSI_OPCODE_LAST                250
>>     /**
>>    * Opcode is the operation code to execute. A given operation
>> defines the
>>
> 
> Looks good to me.
> 
> Reviewed-by: Jose Fonseca <jfonseca at vmware.com>
> 
> It would be nice to get rid of the DX10 non-DX10 texture opcode
> duplication.  Even if GL drivers don't care about this, they could
> easily still process the DX10 tokens -- with the guarantee that the same
> unit would be used both for resource and sampler.
> 
> Even if we don't get agreement to change gallium interface, we could
> have lp_bld_tgsi_action.c to convert DX9-style opcodes into DX10 style
> so the rest of the code doesn't need to handle both variations.  On the
> other hand, perhaps it's not worth the hassle, if we're not changing the
> interface.

I like the dx10 sample opcodes a lot more too. These old opcodes made
sense in dx9 days, and they use less regs (even disregarding the
additional sampler/resource reg) due to packing lod, comparison value
etc. into the same reg with the coordinates. But nowadays, with cube map
arrays, you now even need additional "special" opcodes because you can't
encode everything into the same reg anyway... That's just ugly.
But converting all the code (both state trackers and drivers) to use the
new ones isn't all that easy - it's not just the separate
sampler/resource regs but quite a bit more, e.g. where the comparison
information comes from.

Roland