[PATCH] etnaviv: Generate new sin/cos instructions on GC3000

Christian Gmeiner christian.gmeiner at gmail.com
Tue Jan 31 07:47:10 UTC 2017


Hi Wladimir,


2017-01-30 15:33 GMT+01:00 Wladimir J. van der Laan <laanwj at gmail.com>:
> On Wed, Nov 30, 2016 at 10:12:03AM +0100, Wladimir J. van der Laan wrote:
>> Shaders using sin/cos instructions were not working on GC3000.
>>
>> The reason for this turns out to be that these chips implement sin/cos
>> in a different way (but using the same opcodes):
>>
>> - Need their input scaled by 1/pi instead of 2/pi.
>>
>> - Output an x and y component, which need to be multiplied to
>>   get the result.
>>
>> - tex_amode needs to be set to 1.
>>
>> Add a new bit to the compiler specs and generate these instructions
>> as necessary.
>
> I forgot, so hereby:
>
> Signed-off-by: Wladimir J. van der Laan <laanwj at gmail.com>
>

whats with the other review comments? Please have a look at them and
send a new version of this patch. The
current version has too many changes which are not related to sin/cos
for GC3000 - sorry.

>> ---
>>  src/gallium/drivers/etnaviv/etnaviv_compiler.c | 46 +++++++++++++++++++++++---
>>  src/gallium/drivers/etnaviv/etnaviv_internal.h |  7 +++-
>>  src/gallium/drivers/etnaviv/etnaviv_screen.c   |  2 ++
>>  3 files changed, 50 insertions(+), 5 deletions(-)
>>
>> diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.c b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
>> index de03c52..3614304 100644
>> --- a/src/gallium/drivers/etnaviv/etnaviv_compiler.c
>> +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
>> @@ -171,8 +171,10 @@ struct etna_compile {
>>     /* Temporary register for use within translated TGSI instruction,
>>      * only allocated when needed.
>>      */
>> -   int inner_temps; /* number of inner temps used; only up to one available at
>> -                       this point */
>> +   int inner_temps; /* number of inner temps used; only up to two available at */
>> +                    /* this point. Beware that one may be used internally to */
>> +                    /* accommodate an uniform for an instruction that gets passed */
>> +                    /* two uniforms as inputs. */
>>     struct etna_native_reg inner_temp[ETNA_MAX_INNER_TEMPS];
>>
>>     /* Fields for handling nested conditionals */
>> @@ -734,7 +736,8 @@ etna_compile_pass_optimize_outputs(struct etna_compile *c)
>>
>>  /* Get a temporary to be used within one TGSI instruction.
>>   * The first time that this function is called the temporary will be allocated.
>> - * Each call to this function will return the same temporary.
>> + * Each call to this function for a different instruction will return the same
>> + * temporaries.
>>   */
>>  static struct etna_native_reg
>>  etna_compile_get_inner_temp(struct etna_compile *c)
>> @@ -1474,7 +1477,42 @@ static void
>>  trans_trig(const struct instr_translater *t, struct etna_compile *c,
>>             const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
>>  {
>> -   if (c->specs->has_sin_cos_sqrt) {
>> +   if (c->specs->has_new_sin_cos) { /* Alternative SIN/COS */
>> +      /* On newer chips alternative SIN/COS instructions are implemented,
>> +       * which:
>> +       * - Need their input scaled by 1/pi instead of 2/pi
>> +       * - Output an x and y component, which need to be multiplied to
>> +       *   get the result
>> +       */
>> +      /* TGSI lowering should deal with SCS */
>> +      assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
>> +
>> +      struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */
>> +      emit_inst(c, &(struct etna_inst) {
>> +         .opcode = INST_OPCODE_MUL,
>> +         .sat = 0,
>> +         .dst = etna_native_to_dst(temp, INST_COMPS_Z),
>> +         .src[0] = src[0], /* any swizzling happens here */
>> +         .src[1] = alloc_imm_f32(c, 1.0f / M_PI),
>> +      });
>> +      emit_inst(c, &(struct etna_inst) {
>> +         .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
>> +                    ? INST_OPCODE_COS
>> +                    : INST_OPCODE_SIN,
>> +         .sat = 0,
>> +         .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
>> +         .src[2] = etna_native_to_src(temp, SWIZZLE(Z, Z, Z, Z)),
>> +         .tex = { .amode=1 }, /* Unknown bit needs to be set */
>> +      });
>> +      emit_inst(c, &(struct etna_inst) {
>> +         .opcode = INST_OPCODE_MUL,
>> +         .sat = inst->Instruction.Saturate,
>> +         .dst = convert_dst(c, &inst->Dst[0]),
>> +         .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
>> +         .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
>> +      });
>> +
>> +   } else if (c->specs->has_sin_cos_sqrt) {
>>        /* TGSI lowering should deal with SCS */
>>        assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
>>
>> diff --git a/src/gallium/drivers/etnaviv/etnaviv_internal.h b/src/gallium/drivers/etnaviv/etnaviv_internal.h
>> index f340116..9495fe9 100644
>> --- a/src/gallium/drivers/etnaviv/etnaviv_internal.h
>> +++ b/src/gallium/drivers/etnaviv/etnaviv_internal.h
>> @@ -58,7 +58,10 @@
>>  #define ETNA_SE_CLIP_MARGIN_RIGHT (0xffff)
>>  #define ETNA_SE_CLIP_MARGIN_BOTTOM (0xffff)
>>
>> -/* GPU chip 3D specs */
>> +/* GPU chip 3D specs.
>> + * This structure is passed to the compiler to determine code generation
>> + * parameters.
>> + */
>>  struct etna_specs {
>>     /* supports SUPERTILE (64x64) tiling? */
>>     unsigned can_supertile : 1;
>> @@ -70,6 +73,8 @@ struct etna_specs {
>>     unsigned has_sign_floor_ceil : 1;
>>     /* can use VS_RANGE, PS_RANGE registers*/
>>     unsigned has_shader_range_registers : 1;
>> +   /* has the new sin/cos functions */
>> +   unsigned has_new_sin_cos : 1;
>>     /* can use any kind of wrapping mode on npot textures */
>>     unsigned npot_tex_any_wrap;
>>     /* number of bits per TS tile */
>> diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c b/src/gallium/drivers/etnaviv/etnaviv_screen.c
>> index bc5488b..d632fd0 100644
>> --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
>> +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
>> @@ -611,6 +611,8 @@ etna_get_specs(struct etna_screen *screen)
>>        screen->model >= 0x1000 || screen->model == 0x880;
>>     screen->specs.npot_tex_any_wrap =
>>        VIV_FEATURE(screen, chipMinorFeatures1, NON_POWER_OF_TWO);
>> +   screen->specs.has_new_sin_cos =
>> +      VIV_FEATURE(screen, chipMinorFeatures3, HAS_FAST_TRANSCENDENTALS);
>>
>>     if (instruction_count > 256) { /* unified instruction memory? */
>>        screen->specs.vs_offset = 0xC000;
>> --
>> 2.7.4
>>

greets
--
Christian Gmeiner, MSc

https://www.youtube.com/user/AloryOFFICIAL
https://soundcloud.com/christian-gmeiner


More information about the etnaviv mailing list