[virglrenderer-devel] [PATCH 08/12] arb_gpu_shader5: add support for interpolation instructions

Thu May 17 01:54:05 UTC 2018

On 16 May 2018 at 13:42, Gurchetan Singh <gurchetansingh at chromium.org> wrote:
> On Mon, May 14, 2018 at 9:38 PM Dave Airlie <airlied at gmail.com> wrote:
>
>> From: Dave Airlie <airlied at redhat.com>
>
>> ---
>>     src/vrend_shader.c | 95
> +++++++++++++++++++++++++++++++++++++++++++++---------
>>     src/vrend_shader.h |  1 +
>>     2 files changed, 80 insertions(+), 16 deletions(-)
>
>> diff --git a/src/vrend_shader.c b/src/vrend_shader.c
>> index 2585155..2161766 100644
>> --- a/src/vrend_shader.c
>> +++ b/src/vrend_shader.c
>> @@ -37,7 +37,7 @@ extern int vrend_dump_shaders;
>
>>     /* start convert of tgsi to glsl */
>
>> -#define INTERP_PREFIX "               "
>> +#define INTERP_PREFIX "                           "
>>     #define INVARI_PREFIX "invariant"
>
>>     struct vrend_shader_io {
>> @@ -153,6 +153,7 @@ struct dump_ctx {
>>        bool uses_sample_shading;
>>        bool uses_gpu_shader5;
>>        bool write_mul_temp;
>> +   bool write_interp_temp;
>>     };
>
>>     static inline const char *tgsi_proc_to_prefix(int shader_type)
>> @@ -272,6 +273,7 @@ iter_declaration(struct tgsi_iterate_context *iter,
>>           ctx->inputs[i].name = decl->Semantic.Name;
>>           ctx->inputs[i].sid = decl->Semantic.Index;
>>           ctx->inputs[i].interpolate = decl->Interp.Interpolate;
>> +      ctx->inputs[i].centroid = decl->Interp.Location ==
> TGSI_INTERPOLATE_LOC_CENTROID;
>>           ctx->inputs[i].first = decl->Range.First;
>>           ctx->inputs[i].glsl_predefined_no_emit = false;
>>           ctx->inputs[i].glsl_no_index = false;
>> @@ -1632,6 +1634,7 @@ iter_instruction(struct tgsi_iterate_context *iter,
>>        bool override_no_wm[4];
>>        bool dst_override_no_wm[2];
>>        char *sret;
>> +   char interpSrc0[255], interpSwizzle0[10];
>>        int ret;
>>        bool tg4_has_component = false;
>>        if (ctx->prog_type == -1)
>> @@ -1812,8 +1815,18 @@ iter_instruction(struct tgsi_iterate_context *iter,
>>                       if (stype == TGSI_TYPE_UNSIGNED &&
>>                           ctx->inputs[j].is_int)
>>                          srcstypeprefix = "";
>> -                  snprintf(srcs[i], 255, "%s(%s%s%s%s)",
>> -                           srcstypeprefix, prefix,
> ctx->inputs[j].glsl_name, arrayname, ctx->inputs[j].is_int ? "" : swizzle);
>> +
>> +                  if (inst->Instruction.Opcode ==
> TGSI_OPCODE_INTERP_SAMPLE && i == 1) {
>> +                     snprintf(srcs[i], 255, "floatBitsToInt(%s%s%s%s)",
> prefix, ctx->inputs[j].glsl_name, arrayname, swizzle);
>> +                  }
>
>
>
>
>
>
> else
>> +                     snprintf(srcs[i], 255, "%s(%s%s%s%s)",
> srcstypeprefix, prefix, ctx->inputs[j].glsl_name, arrayname,
> ctx->inputs[j].is_int ? "" : swizzle);
>> +               }
>> +               if ((inst->Instruction.Opcode ==
> TGSI_OPCODE_INTERP_SAMPLE ||
>> +                    inst->Instruction.Opcode ==
> TGSI_OPCODE_INTERP_OFFSET ||
>> +                    inst->Instruction.Opcode ==
> TGSI_OPCODE_INTERP_CENTROID) &&
>> +                   i == 0) {
>> +                  snprintf(interpSrc0, 255, "%s",
> ctx->inputs[j].glsl_name);
>> +                  snprintf(interpSwizzle0, 10, "%s", swizzle);
>>                    }
>>                    override_no_wm[i] = ctx->inputs[j].override_no_wm;
>>                    break;
>> @@ -1823,6 +1836,11 @@ iter_instruction(struct tgsi_iterate_context *iter,
>>              struct vrend_temp_range *range = find_temp_range(ctx,
> src->Register.Index);
>>              if (!range)
>>                 return FALSE;
>> +         if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE && i
> == 1) {
>> +            stprefix = true;
>> +            stypeprefix = "floatBitsToInt";
>> +         }
>> +
>>              if (src->Register.Indirect) {
>>                 snprintf(srcs[i], 255, "%s%c%stemp%d[addr0 + %d]%s%c",
> stypeprefix, stprefix ? '(' : ' ', prefix, range->first,
> src->Register.Index - range->first, swizzle, stprefix ? ')' : ' ');
>>              } else
>> @@ -1839,7 +1857,9 @@ iter_instruction(struct tgsi_iterate_context *iter,
>>              } else {
>>                 const char *csp;
>>                 ctx->has_ints = true;
>> -            if (stype == TGSI_TYPE_FLOAT || stype == TGSI_TYPE_UNTYPED)
>> +            if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE &&
> i == 1)
>> +               csp = "ivec4";
>> +            else if (stype == TGSI_TYPE_FLOAT || stype ==
> TGSI_TYPE_UNTYPED)
>>                    csp = "uintBitsToFloat";
>>                 else if (stype == TGSI_TYPE_SIGNED)
>>                    csp = "ivec4";
>> @@ -1866,7 +1886,8 @@ iter_instruction(struct tgsi_iterate_context *iter,
>>              const char *vtype = "vec4";
>>              const char *imm_stypeprefix = stypeprefix;
>
>> -         if ((inst->Instruction.Opcode == TGSI_OPCODE_TG4 && i == 1))
>> +         if ((inst->Instruction.Opcode == TGSI_OPCODE_TG4 && i == 1) ||
>> +             (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE && i
> == 1))
>>                 stype = TGSI_TYPE_SIGNED;
>
>>              if (imd->type == TGSI_IMM_UINT32 || imd->type ==
> TGSI_IMM_INT32) {
>> @@ -2343,6 +2364,30 @@ iter_instruction(struct tgsi_iterate_context *iter,
>>           EMIT_BUF_WITH_RET(ctx, buf);
>>           break;
>>        }
>> +   case TGSI_OPCODE_INTERP_CENTROID:
>> +      snprintf(buf, 255, "interp_temp = interpolateAtCentroid(%s);\n",
> interpSrc0);
>> +      EMIT_BUF_WITH_RET(ctx, buf);
>> +      snprintf(buf, 255, "%s = %s(%s(interp_temp%s));\n", dsts[0],
> dstconv, dtypeprefix, interpSwizzle0);
>
> Why is an interp_temp needed?  Also, shouldn't the destination write-mask
> also work (that's what's used for other functions):
>
> %s = %s(%s(interpolateAtCentroid(%s)%s));\n", dsts[0], dstconv,
> dtypeprefix, srcs[0], writemask);
>
> If that doesn't work, it's probably better to name it src_swizzle[0] if
> other instructions need that information.

GLSL interpolateAt instructions are special in they have some constraints

- they have to take an input (not a temp or other intermediate variable)
- the input size must match the output size, no swizzling or writemasks

So we have to do the GLSL call first (hence interp_temp), then convert
the result to match
what TGSI wants.

Dave.