[Mesa-dev] [PATCH 2/5] tgsi: add support for new SQRT opcode
Marek Olšák
maraeo at gmail.com
Fri Feb 1 17:39:06 PST 2013
The SQRT instruction is defined as TGSI_OUTPUT_REPLICATE (REPL in the
table) in this patch, but the implementation looks like it's
component-wise, which is much slower (4 sqrt calls instead of 1).
For example, r600g implements such instructions by computing
func(src.x) in a temporary register and then replicating the result in
the destination register.
The same applies to other instructions marked as REPL. tgsi_exec seems
to be implemented rather inefficiently.
Marek
On Fri, Feb 1, 2013 at 7:29 PM, Brian Paul <brianp at vmware.com> wrote:
> ---
> src/gallium/auxiliary/tgsi/tgsi_exec.c | 14 ++++++++++++++
> src/gallium/auxiliary/tgsi/tgsi_exec.h | 2 ++
> src/gallium/auxiliary/tgsi/tgsi_info.c | 2 +-
> src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h | 1 +
> 4 files changed, 18 insertions(+), 1 deletions(-)
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> index 9f226c4..1220478 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> @@ -343,6 +343,16 @@ micro_rsq(union tgsi_exec_channel *dst,
> }
>
> static void
> +micro_sqrt(union tgsi_exec_channel *dst,
> + const union tgsi_exec_channel *src)
> +{
> + dst->f[0] = sqrtf(fabsf(src->f[0]));
> + dst->f[1] = sqrtf(fabsf(src->f[1]));
> + dst->f[2] = sqrtf(fabsf(src->f[2]));
> + dst->f[3] = sqrtf(fabsf(src->f[3]));
> +}
> +
> +static void
> micro_seq(union tgsi_exec_channel *dst,
> const union tgsi_exec_channel *src0,
> const union tgsi_exec_channel *src1)
> @@ -3562,6 +3572,10 @@ exec_instruction(
> exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
> break;
>
> + case TGSI_OPCODE_SQRT:
> + exec_vector_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
> + break;
> +
> case TGSI_OPCODE_DP2A:
> exec_dp2a(mach, inst);
> break;
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
> index fbd28a2..1a7d979 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
> @@ -441,6 +441,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
> return 1;
> case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
> return PIPE_MAX_SAMPLERS;
> + case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
> + return 1;
> default:
> return 0;
> }
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
> index 458bc69..94b6f60 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
> @@ -57,7 +57,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
> { 1, 2, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB },
> { 1, 3, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
> { 1, 3, 0, 0, 0, 0, COMP, "CND", TGSI_OPCODE_CND },
> - { 0, 0, 0, 0, 0, 0, NONE, "", 20 }, /* removed */
> + { 1, 1, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
> { 1, 3, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
> { 0, 0, 0, 0, 0, 0, NONE, "", 22 }, /* removed */
> { 0, 0, 0, 0, 0, 0, NONE, "", 23 }, /* removed */
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
> index 96b864f..75e27a6 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
> @@ -60,6 +60,7 @@ OP13(MAD)
> OP12(SUB)
> OP13(LRP)
> OP13(CND)
> +OP11(SQRT)
> OP13(DP2A)
> OP11(FRC)
> OP13(CLAMP)
> --
> 1.7.3.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list