[Mesa-dev] [PATCH 2/5] tgsi: add support for new SQRT opcode

Marek Olšák maraeo at gmail.com
Fri Feb 1 17:39:06 PST 2013


The SQRT instruction is defined as TGSI_OUTPUT_REPLICATE (REPL in the
table) in this patch, but the implementation looks like it's
component-wise, which is much slower (4 sqrt calls instead of 1).

For example, r600g implements such instructions by computing
func(src.x) in a temporary register and then replicating the result in
the destination register.

The same applies to other instructions marked as REPL. tgsi_exec seems
to be implemented rather inefficiently.

Marek

On Fri, Feb 1, 2013 at 7:29 PM, Brian Paul <brianp at vmware.com> wrote:
> ---
>  src/gallium/auxiliary/tgsi/tgsi_exec.c       |   14 ++++++++++++++
>  src/gallium/auxiliary/tgsi/tgsi_exec.h       |    2 ++
>  src/gallium/auxiliary/tgsi/tgsi_info.c       |    2 +-
>  src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h |    1 +
>  4 files changed, 18 insertions(+), 1 deletions(-)
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> index 9f226c4..1220478 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> @@ -343,6 +343,16 @@ micro_rsq(union tgsi_exec_channel *dst,
>  }
>
>  static void
> +micro_sqrt(union tgsi_exec_channel *dst,
> +           const union tgsi_exec_channel *src)
> +{
> +   dst->f[0] = sqrtf(fabsf(src->f[0]));
> +   dst->f[1] = sqrtf(fabsf(src->f[1]));
> +   dst->f[2] = sqrtf(fabsf(src->f[2]));
> +   dst->f[3] = sqrtf(fabsf(src->f[3]));
> +}
> +
> +static void
>  micro_seq(union tgsi_exec_channel *dst,
>            const union tgsi_exec_channel *src0,
>            const union tgsi_exec_channel *src1)
> @@ -3562,6 +3572,10 @@ exec_instruction(
>        exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
>        break;
>
> +   case TGSI_OPCODE_SQRT:
> +      exec_vector_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
> +      break;
> +
>     case TGSI_OPCODE_DP2A:
>        exec_dp2a(mach, inst);
>        break;
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
> index fbd28a2..1a7d979 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
> @@ -441,6 +441,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
>        return 1;
>     case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
>        return PIPE_MAX_SAMPLERS;
> +   case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
> +      return 1;
>     default:
>        return 0;
>     }
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
> index 458bc69..94b6f60 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
> @@ -57,7 +57,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
>     { 1, 2, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB },
>     { 1, 3, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
>     { 1, 3, 0, 0, 0, 0, COMP, "CND", TGSI_OPCODE_CND },
> -   { 0, 0, 0, 0, 0, 0, NONE, "", 20 },      /* removed */
> +   { 1, 1, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
>     { 1, 3, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
>     { 0, 0, 0, 0, 0, 0, NONE, "", 22 },      /* removed */
>     { 0, 0, 0, 0, 0, 0, NONE, "", 23 },      /* removed */
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
> index 96b864f..75e27a6 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
> @@ -60,6 +60,7 @@ OP13(MAD)
>  OP12(SUB)
>  OP13(LRP)
>  OP13(CND)
> +OP11(SQRT)
>  OP13(DP2A)
>  OP11(FRC)
>  OP13(CLAMP)
> --
> 1.7.3.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list