[Mesa-dev] [PATCH 3/3] svga: implement TGSI_OPCODE_ROUND

Tue Jul 3 02:08:50 PDT 2012

Looks good AFAICT.

Reviewed-by: Jose Fonseca <jfonseca at vmware.com>

Jose

----- Original Message -----
> ROUND and TRUNC are implemented with one function to reduce code
> duplication.
> ---
>  src/gallium/drivers/svga/svga_tgsi_insn.c |   69
>  ++++++++++++++++++++++-------
>  1 files changed, 53 insertions(+), 16 deletions(-)
> 
> diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c
> b/src/gallium/drivers/svga/svga_tgsi_insn.c
> index a2d6b51..eec40c7 100644
> --- a/src/gallium/drivers/svga/svga_tgsi_insn.c
> +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
> @@ -632,11 +632,11 @@ create_zero_immediate( struct
> svga_shader_emitter *emit )
>  {
>     unsigned idx = emit->nr_hw_float_const++;
>  
> -   /* Emit the constant (0, 0, -1, 1) and use swizzling to generate
> +   /* Emit the constant (0, 0.5, -1, 1) and use swizzling to
> generate
>      * other useful vectors.
>      */
>     if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
> -                        idx, 0, 0, -1, 1 ))
> +                        idx, 0, 0.5, -1, 1 ))
>        return FALSE;
>  
>     emit->zero_immediate_idx = idx;
> @@ -732,6 +732,16 @@ get_pos_neg_one_immediate( struct
> svga_shader_emitter *emit )
>                    3, 3, 3, 2);
>  }
>  
> +/* returns {0.5, 0.5, 0.5, 0.5} immediate */
> +static INLINE struct src_register
> +get_half_immediate( struct svga_shader_emitter *emit )
> +{
> +   assert(emit->created_zero_immediate);
> +   assert(emit->zero_immediate_idx >= 0);
> +   return swizzle(src_register(SVGA3DREG_CONST,
> emit->zero_immediate_idx),
> +                  1, 1, 1, 1);
> +}
> +
>  /* returns the loop const */
>  static INLINE struct src_register
>  get_loop_const( struct svga_shader_emitter *emit )
> @@ -2376,34 +2386,57 @@ static boolean emit_log(struct
> svga_shader_emitter *emit,
>  
>  
>  /**
> - * Translate TGSI TRUNC instruction.
> + * Translate TGSI TRUNC or ROUND instruction.
>   * We need to truncate toward zero. Ex: trunc(-1.9) = -1
>   * Different approaches are needed for VS versus PS.
>   */
>  static boolean
> -emit_trunc(struct svga_shader_emitter *emit,
> -           const struct tgsi_full_instruction *insn)
> +emit_trunc_round(struct svga_shader_emitter *emit,
> +                 const struct tgsi_full_instruction *insn,
> +                 boolean round)
>  {
>     SVGA3dShaderDestToken dst = translate_dst_register(emit, insn,
>     0);
>     const struct src_register src0 =
>        translate_src_register(emit, &insn->Src[0] );
>     SVGA3dShaderDestToken t1 = get_temp(emit);
>  
> -   /* t1 = fract(abs(src0)) */
> -   if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1,
> absolute(src0)))
> -      return FALSE;
> +   if (round) {
> +      SVGA3dShaderDestToken t0 = get_temp(emit);
> +      struct src_register half = get_half_immediate(emit);
>  
> -   /* t1 = abs(src0) - t1 */
> -   if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1,
> absolute(src0),
> -                   negate(src(t1))))
> -      return FALSE;
> +      /* t0 = abs(src0) + 0.5 */
> +      if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t0,
> +                      absolute(src0), half))
> +         return FALSE;
> +
> +      /* t1 = fract(t0) */
> +      if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, src(t0)))
> +         return FALSE;
> +
> +      /* t1 = t0 - t1 */
> +      if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, src(t0),
> +                      negate(src(t1))))
> +         return FALSE;
> +   }
> +   else {
> +      /* trunc */
> +
> +      /* t1 = fract(abs(src0)) */
> +      if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1,
> absolute(src0)))
> +         return FALSE;
> +
> +      /* t1 = abs(src0) - t1 */
> +      if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1,
> absolute(src0),
> +                      negate(src(t1))))
> +         return FALSE;
> +   }
>  
>     /*
>      * Now we need to multiply t1 by the sign of the original value.
>     */
>     if (emit->unit == PIPE_SHADER_VERTEX) {
>        /* For VS: use SGN instruction */
> -      /* Need another temp plus two extra/dummy registers */
> +      /* Need two extra/dummy registers: */
>        SVGA3dShaderDestToken t2 = get_temp(emit), t3 =
>        get_temp(emit),
>           t4 = get_temp(emit);
>  
> @@ -2519,7 +2552,12 @@ static boolean svga_emit_instruction( struct
> svga_shader_emitter *emit,
>        return emit_floor( emit, insn );
>  
>     case TGSI_OPCODE_TRUNC:
> -      return emit_trunc( emit, insn );
> +      return emit_trunc_round( emit, insn, FALSE );
> +
> +   case TGSI_OPCODE_ROUNDEVEN:
> +      /* Note: ROUNDEVEN not properly implemented yet */
> +   case TGSI_OPCODE_ROUND:
> +      return emit_trunc_round( emit, insn, TRUE );
>  
>     case TGSI_OPCODE_CEIL:
>        return emit_ceil( emit, insn );
> @@ -2612,8 +2650,6 @@ static boolean svga_emit_instruction( struct
> svga_shader_emitter *emit,
>         * about:
>         */
>     case TGSI_OPCODE_CLAMP:
> -   case TGSI_OPCODE_ROUND:
> -   case TGSI_OPCODE_ROUNDEVEN:
>     case TGSI_OPCODE_AND:
>     case TGSI_OPCODE_OR:
>     case TGSI_OPCODE_I2F:
> @@ -3102,6 +3138,7 @@ needs_to_create_zero( struct
> svga_shader_emitter *emit )
>         emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 ||
>         emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 ||
>         emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 ||
> +       emit->info.opcode_count[TGSI_OPCODE_ROUND] >= 1 ||
>         emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
>         emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
>         emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
> --
> 1.7.3.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>