[Mesa-dev] [PATCH] gallium: add double opcodes and TGSI execution (v2.1)
Roland Scheidegger
sroland at vmware.com
Mon Dec 29 14:14:48 PST 2014
Just minor nits, looks good to me otherwise.
I agree with others that probably the round family of functions should
be added too (but could be done in another patch). Maybe could have one
cap bit then (so some implementations only doing what's required by sm5,
hence missing things like round and rsq and everybody else being able to
do everything).
Roland
Am 23.12.2014 um 22:50 schrieb Dave Airlie:
> This patch adds support for a set of double opcodes
> to TGSI. It is an update of work done originally
> by Michal Krol on the gallium-double-opcodes branch.
>
> The opcodes have a hint where they came from in the
> header file.
>
> v2: add unsigned/int <-> double
> v2.1: update docs.
> This is based on code by Michael Krol <michal at vmware.com>
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
> src/gallium/auxiliary/tgsi/tgsi_exec.c | 743 ++++++++++++++++++++++++++++-
> src/gallium/auxiliary/tgsi/tgsi_info.c | 24 +-
> src/gallium/docs/source/tgsi.rst | 76 ++-
> src/gallium/include/pipe/p_shader_tokens.h | 26 +-
> 4 files changed, 850 insertions(+), 19 deletions(-)
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> index 834568b..6af4730 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> @@ -72,6 +72,16 @@
> #define TILE_BOTTOM_LEFT 2
> #define TILE_BOTTOM_RIGHT 3
>
> +union tgsi_double_channel {
> + double d[TGSI_QUAD_SIZE];
> + unsigned u[TGSI_QUAD_SIZE][2];
> +};
> +
> +struct tgsi_double_vector {
> + union tgsi_double_channel xy;
> + union tgsi_double_channel zw;
> +};
> +
> static void
> micro_abs(union tgsi_exec_channel *dst,
> const union tgsi_exec_channel *src)
> @@ -147,6 +157,55 @@ micro_cos(union tgsi_exec_channel *dst,
> }
>
> static void
> +micro_d2f(union tgsi_exec_channel *dst,
> + const union tgsi_double_channel *src)
> +{
> + dst->f[0] = (float)src->d[0];
> + dst->f[1] = (float)src->d[1];
> + dst->f[2] = (float)src->d[2];
> + dst->f[3] = (float)src->d[3];
> +}
> +
> +static void
> +micro_d2i(union tgsi_exec_channel *dst,
> + const union tgsi_double_channel *src)
> +{
> + dst->i[0] = (int)src->d[0];
> + dst->i[1] = (int)src->d[1];
> + dst->i[2] = (int)src->d[2];
> + dst->i[3] = (int)src->d[3];
> +}
> +
> +static void
> +micro_d2u(union tgsi_exec_channel *dst,
> + const union tgsi_double_channel *src)
> +{
> + dst->u[0] = (unsigned)src->d[0];
> + dst->u[1] = (unsigned)src->d[1];
> + dst->u[2] = (unsigned)src->d[2];
> + dst->u[3] = (unsigned)src->d[3];
> +}
> +static void
> +micro_dabs(union tgsi_double_channel *dst,
> + const union tgsi_double_channel *src)
> +{
> + dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0];
> + dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1];
> + dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2];
> + dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3];
> +}
> +
> +static void
> +micro_dadd(union tgsi_double_channel *dst,
> + const union tgsi_double_channel *src)
> +{
> + dst->d[0] = src[0].d[0] + src[1].d[0];
> + dst->d[1] = src[0].d[1] + src[1].d[1];
> + dst->d[2] = src[0].d[2] + src[1].d[2];
> + dst->d[3] = src[0].d[3] + src[1].d[3];
> +}
> +
> +static void
> micro_ddx(union tgsi_exec_channel *dst,
> const union tgsi_exec_channel *src)
> {
> @@ -167,6 +226,159 @@ micro_ddy(union tgsi_exec_channel *dst,
> }
>
> static void
> +micro_ddiv(union tgsi_double_channel *dst,
> + const union tgsi_double_channel *src)
> +{
> + dst->d[0] = src[0].d[0] / src[1].d[0];
> + dst->d[1] = src[0].d[1] / src[1].d[1];
> + dst->d[2] = src[0].d[2] / src[1].d[2];
> + dst->d[3] = src[0].d[3] / src[1].d[3];
> +}
> +
> +static void
> +micro_dmul(union tgsi_double_channel *dst,
> + const union tgsi_double_channel *src)
> +{
> + dst->d[0] = src[0].d[0] * src[1].d[0];
> + dst->d[1] = src[0].d[1] * src[1].d[1];
> + dst->d[2] = src[0].d[2] * src[1].d[2];
> + dst->d[3] = src[0].d[3] * src[1].d[3];
> +}
> +
> +static void
> +micro_dmax(union tgsi_double_channel *dst,
> + const union tgsi_double_channel *src)
> +{
> + dst->d[0] = src[0].d[0] > src[1].d[0] ? src[0].d[0] : src[1].d[0];
> + dst->d[1] = src[0].d[1] > src[1].d[1] ? src[0].d[1] : src[1].d[1];
> + dst->d[2] = src[0].d[2] > src[1].d[2] ? src[0].d[2] : src[1].d[2];
> + dst->d[3] = src[0].d[3] > src[1].d[3] ? src[0].d[3] : src[1].d[3];
> +}
> +
> +static void
> +micro_dmin(union tgsi_double_channel *dst,
> + const union tgsi_double_channel *src)
> +{
> + dst->d[0] = src[0].d[0] < src[1].d[0] ? src[0].d[0] : src[1].d[0];
> + dst->d[1] = src[0].d[1] < src[1].d[1] ? src[0].d[1] : src[1].d[1];
> + dst->d[2] = src[0].d[2] < src[1].d[2] ? src[0].d[2] : src[1].d[2];
> + dst->d[3] = src[0].d[3] < src[1].d[3] ? src[0].d[3] : src[1].d[3];
> +}
> +
> +static void
> +micro_dneg(union tgsi_double_channel *dst,
> + const union tgsi_double_channel *src)
> +{
> + dst->d[0] = -src->d[0];
> + dst->d[1] = -src->d[1];
> + dst->d[2] = -src->d[2];
> + dst->d[3] = -src->d[3];
> +}
> +
> +static void
> +micro_dslt(union tgsi_double_channel *dst,
> + const union tgsi_double_channel *src)
> +{
> + dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U;
> + dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U;
> + dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U;
> + dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U;
> +}
> +
> +static void
> +micro_dsne(union tgsi_double_channel *dst,
> + const union tgsi_double_channel *src)
> +{
> + dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U;
> + dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U;
> + dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U;
> + dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U;
> +}
> +
> +static void
> +micro_dsge(union tgsi_double_channel *dst,
> + const union tgsi_double_channel *src)
> +{
> + // fprintf(stderr, "%f %f\n", src[0].d[0], src[1].d[0]);
> + dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U;
> + dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U;
> + dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U;
> + dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U;
> +}
> +
> +static void
> +micro_dseq(union tgsi_double_channel *dst,
> + const union tgsi_double_channel *src)
> +{
> + dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U;
> + dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U;
> + dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U;
> + dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U;
> +}
> +
> +static void
> +micro_drcp(union tgsi_double_channel *dst,
> + const union tgsi_double_channel *src)
> +{
> + dst->d[0] = 1.0 / src->d[0];
> + dst->d[1] = 1.0 / src->d[1];
> + dst->d[2] = 1.0 / src->d[2];
> + dst->d[3] = 1.0 / src->d[3];
> +}
> +
> +static void
> +micro_dsqrt(union tgsi_double_channel *dst,
> + const union tgsi_double_channel *src)
> +{
> + dst->d[0] = sqrt(src->d[0]);
> + dst->d[1] = sqrt(src->d[1]);
> + dst->d[2] = sqrt(src->d[2]);
> + dst->d[3] = sqrt(src->d[3]);
> +}
> +
> +static void
> +micro_dmad(union tgsi_double_channel *dst,
> + const union tgsi_double_channel *src)
> +{
> + dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0];
> + dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1];
> + dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2];
> + dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3];
> +}
> +
> +static void
> +micro_dfrac(union tgsi_double_channel *dst,
> + const union tgsi_double_channel *src)
> +{
> + dst->d[0] = src->d[0] - floor(src->d[0]);
> + dst->d[1] = src->d[1] - floor(src->d[1]);
> + dst->d[2] = src->d[2] - floor(src->d[2]);
> + dst->d[3] = src->d[3] - floor(src->d[3]);
> +}
> +
> +static void
> +micro_dldexp(union tgsi_double_channel *dst,
> + const union tgsi_double_channel *src0,
> + union tgsi_exec_channel *src1)
> +{
> + dst->d[0] = ldexp(src0->d[0], src1->i[1]);
> + dst->d[1] = ldexp(src0->d[1], src1->i[1]);
> + dst->d[2] = ldexp(src0->d[2], src1->i[2]);
> + dst->d[3] = ldexp(src0->d[3], src1->i[3]);
> +}
> +
> +static void
> +micro_dfracexp(union tgsi_double_channel *dst,
> + union tgsi_exec_channel *dst_exp,
> + const union tgsi_double_channel *src)
> +{
> + dst->d[0] = frexp(src->d[0], &dst_exp->i[0]);
> + dst->d[1] = frexp(src->d[1], &dst_exp->i[1]);
> + dst->d[2] = frexp(src->d[2], &dst_exp->i[2]);
> + dst->d[3] = frexp(src->d[3], &dst_exp->i[3]);
> +}
> +
> +static void
> micro_exp2(union tgsi_exec_channel *dst,
> const union tgsi_exec_channel *src)
> {
> @@ -201,6 +413,16 @@ micro_exp2(union tgsi_exec_channel *dst,
> }
>
> static void
> +micro_f2d(union tgsi_double_channel *dst,
> + const union tgsi_exec_channel *src)
> +{
> + dst->d[0] = (double)src->f[0];
> + dst->d[1] = (double)src->f[1];
> + dst->d[2] = (double)src->f[2];
> + dst->d[3] = (double)src->f[3];
> +}
> +
> +static void
> micro_flr(union tgsi_exec_channel *dst,
> const union tgsi_exec_channel *src)
> {
> @@ -221,6 +443,16 @@ micro_frc(union tgsi_exec_channel *dst,
> }
>
> static void
> +micro_i2d(union tgsi_double_channel *dst,
> + const union tgsi_exec_channel *src)
> +{
> + dst->d[0] = (double)src->i[0];
> + dst->d[1] = (double)src->i[1];
> + dst->d[2] = (double)src->i[2];
> + dst->d[3] = (double)src->i[3];
> +}
> +
> +static void
> micro_iabs(union tgsi_exec_channel *dst,
> const union tgsi_exec_channel *src)
> {
> @@ -449,11 +681,21 @@ micro_trunc(union tgsi_exec_channel *dst,
> dst->f[3] = (float)(int)src->f[3];
> }
>
> +static void
> +micro_u2d(union tgsi_double_channel *dst,
> + const union tgsi_exec_channel *src)
> +{
> + dst->d[0] = (double)src->u[0];
> + dst->d[1] = (double)src->u[1];
> + dst->d[2] = (double)src->u[2];
> + dst->d[3] = (double)src->u[3];
> +}
>
> enum tgsi_exec_datatype {
> TGSI_EXEC_DATA_FLOAT,
> TGSI_EXEC_DATA_INT,
> - TGSI_EXEC_DATA_UINT
> + TGSI_EXEC_DATA_UINT,
> + TGSI_EXEC_DATA_DOUBLE,
> };
>
> /*
> @@ -1090,11 +1332,11 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach,
> }
>
> static void
> -fetch_source(const struct tgsi_exec_machine *mach,
> - union tgsi_exec_channel *chan,
> - const struct tgsi_full_src_register *reg,
> - const uint chan_index,
> - enum tgsi_exec_datatype src_datatype)
> +fetch_source_d(const struct tgsi_exec_machine *mach,
I think the _d in the name is a bit misleading here since this fetches
any type, not just floats. Unless this stands for something else...
> + union tgsi_exec_channel *chan,
> + const struct tgsi_full_src_register *reg,
> + const uint chan_index,
> + enum tgsi_exec_datatype src_datatype, bool dtype)
> {
> union tgsi_exec_channel index;
> union tgsi_exec_channel index2D;
> @@ -1238,6 +1480,9 @@ fetch_source(const struct tgsi_exec_machine *mach,
> &index2D,
> chan);
>
> + if (dtype)
> + return;
> +
> if (reg->Register.Absolute) {
> if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
> micro_abs(chan, chan);
> @@ -1256,12 +1501,22 @@ fetch_source(const struct tgsi_exec_machine *mach,
> }
>
> static void
> -store_dest(struct tgsi_exec_machine *mach,
> - const union tgsi_exec_channel *chan,
> - const struct tgsi_full_dst_register *reg,
> - const struct tgsi_full_instruction *inst,
> - uint chan_index,
> - enum tgsi_exec_datatype dst_datatype)
> +fetch_source(const struct tgsi_exec_machine *mach,
> + union tgsi_exec_channel *chan,
> + const struct tgsi_full_src_register *reg,
> + const uint chan_index,
> + enum tgsi_exec_datatype src_datatype)
> +{
> + fetch_source_d(mach, chan, reg, chan_index, src_datatype, false);
> +}
> +
> +static void
> +store_dest_optsat(struct tgsi_exec_machine *mach,
> + const union tgsi_exec_channel *chan,
> + const struct tgsi_full_dst_register *reg,
> + const struct tgsi_full_instruction *inst,
> + uint chan_index,
> + enum tgsi_exec_datatype dst_datatype, bool sat)
> {
> uint i;
> union tgsi_exec_channel null;
> @@ -1471,6 +1726,14 @@ store_dest(struct tgsi_exec_machine *mach,
> }
> }
>
> + if (!sat) {
> + /* doubles path */
> + for (i = 0; i < TGSI_QUAD_SIZE; i++)
> + if (execmask & (1 << i))
> + dst->i[i] = chan->i[i];
> + return;
> + }
> +
> switch (inst->Instruction.Saturate) {
> case TGSI_SAT_NONE:
> for (i = 0; i < TGSI_QUAD_SIZE; i++)
> @@ -1505,8 +1768,20 @@ store_dest(struct tgsi_exec_machine *mach,
> default:
> assert( 0 );
> }
> +
> }
>
> +static void
> +store_dest(struct tgsi_exec_machine *mach,
> + const union tgsi_exec_channel *chan,
> + const struct tgsi_full_dst_register *reg,
> + const struct tgsi_full_instruction *inst,
> + uint chan_index,
> + enum tgsi_exec_datatype dst_datatype)
> +{
> + store_dest_optsat(mach, chan, reg, inst, chan_index,
> + dst_datatype, true);
> +}
> #define FETCH(VAL,INDEX,CHAN)\
> fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
>
> @@ -2980,6 +3255,358 @@ exec_endswitch(struct tgsi_exec_machine *mach)
> UPDATE_EXEC_MASK(mach);
> }
>
> +typedef void (* micro_dop)(union tgsi_double_channel *dst,
> + const union tgsi_double_channel *src);
> +
> +static void
> +fetch_double_channel(struct tgsi_exec_machine *mach,
> + union tgsi_double_channel *chan,
> + const struct tgsi_full_src_register *reg,
> + uint chan_0,
> + uint chan_1)
> +{
> + union tgsi_exec_channel src[2];
> + uint i;
> +
> + /* XXX: That won't work for operand modifiers.
> + */
> + fetch_source_d(mach, &src[0], reg, chan_0, TGSI_EXEC_DATA_UINT, true);
> + fetch_source_d(mach, &src[1], reg, chan_1, TGSI_EXEC_DATA_UINT, true);
> +
> + for (i = 0; i < TGSI_QUAD_SIZE; i++) {
> + chan->u[i][0] = src[0].u[i];
> + chan->u[i][1] = src[1].u[i];
> + }
> + if (reg->Register.Absolute) {
> + micro_dabs(chan, chan);
> + }
> + if (reg->Register.Negate) {
> + micro_dneg(chan, chan);
> + }
> +
> +}
> +
> +static void
> +store_double_channel(struct tgsi_exec_machine *mach,
> + const union tgsi_double_channel *chan,
> + const struct tgsi_full_dst_register *reg,
> + const struct tgsi_full_instruction *inst,
> + uint chan_0,
> + uint chan_1)
> +{
> + union tgsi_exec_channel dst[2];
> + uint i;
> + union tgsi_double_channel temp;
> + const uint execmask = mach->ExecMask;
> + switch (inst->Instruction.Saturate) {
> + case TGSI_SAT_NONE:
> + for (i = 0; i < TGSI_QUAD_SIZE; i++)
> + if (execmask & (1 << i)) {
> + dst[0].u[i] = chan->u[i][0];
> + dst[1].u[i] = chan->u[i][1];
> + }
> + break;
> +
> + case TGSI_SAT_ZERO_ONE:
> + for (i = 0; i < TGSI_QUAD_SIZE; i++)
> + if (execmask & (1 << i)) {
> + if (chan->d[i] < 0.0f)
> + temp.d[i] = 0.0;
> + else if (chan->d[i] > 1.0f)
> + temp.d[i] = 1.0;
> + else
> + temp.d[i] = chan->d[i];
> +
> + dst[0].u[i] = temp.u[i][0];
> + dst[1].u[i] = temp.u[i][1];
> + }
> + break;
> +
> + case TGSI_SAT_MINUS_PLUS_ONE:
> + for (i = 0; i < TGSI_QUAD_SIZE; i++)
> + if (execmask & (1 << i)) {
> + if (chan->d[i] < -1.0)
> + temp.d[i] = -1.0;
> + else if (chan->d[i] > 1.0)
> + temp.d[i] = 1.0;
> + else
> + temp.d[i] = chan->d[i];
> +
> + dst[0].u[i] = temp.u[i][0];
> + dst[1].u[i] = temp.u[i][1];
> + }
> + break;
> +
> + default:
> + assert( 0 );
> + }
> +
> + /* XXX: That won't work for saturate modifiers.
> + */
> + store_dest_optsat(mach, &dst[0], reg, inst, chan_0, TGSI_EXEC_DATA_UINT, false);
> + if (chan_1 != -1)
> + store_dest_optsat(mach, &dst[1], reg, inst, chan_1, TGSI_EXEC_DATA_UINT, false);
> +}
> +
> +static void
> +exec_double_unary(struct tgsi_exec_machine *mach,
> + const struct tgsi_full_instruction *inst,
> + micro_dop op)
> +{
> + union tgsi_double_channel src;
> + union tgsi_double_channel dst;
> +
> + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
> + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
> + op(&dst, &src);
> + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
> + }
> + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
> + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
> + op(&dst, &src);
> + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
> + }
> +}
> +
> +static void
> +exec_double_binary(struct tgsi_exec_machine *mach,
> + const struct tgsi_full_instruction *inst,
> + micro_dop op,
> + enum tgsi_exec_datatype dst_datatype)
> +{
> + union tgsi_double_channel src[2];
> + union tgsi_double_channel dst;
> + int first_dest_chan, second_dest_chan;
> + int wmask;
> +
> + wmask = inst->Dst[0].Register.WriteMask;
> + if (wmask & TGSI_WRITEMASK_XY) {
> + first_dest_chan = TGSI_CHAN_X;
> + second_dest_chan = TGSI_CHAN_Y;
> + if (dst_datatype == TGSI_EXEC_DATA_UINT) {
> + first_dest_chan = (wmask & TGSI_WRITEMASK_X) ? TGSI_CHAN_X : TGSI_CHAN_Y;
> + second_dest_chan = -1;
> + }
> +
> + fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
> + fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
> + op(&dst, src);
> + store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
> + }
> +
> + if (wmask & TGSI_WRITEMASK_ZW) {
> + first_dest_chan = TGSI_CHAN_Z;
> + second_dest_chan = TGSI_CHAN_W;
> + if (dst_datatype == TGSI_EXEC_DATA_UINT) {
> + first_dest_chan = (wmask & TGSI_WRITEMASK_Z) ? TGSI_CHAN_Z : TGSI_CHAN_W;
> + second_dest_chan = -1;
> + }
> +
> + fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
> + fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
> + op(&dst, src);
> + store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
> + }
> +}
> +
> +static void
> +exec_double_trinary(struct tgsi_exec_machine *mach,
> + const struct tgsi_full_instruction *inst,
> + micro_dop op)
> +{
> + union tgsi_double_channel src[3];
> + union tgsi_double_channel dst;
> +
> + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
> + fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
> + fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
> + fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_X, TGSI_CHAN_Y);
> + op(&dst, src);
> + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
> + }
> + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
> + fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
> + fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
> + fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W);
> + op(&dst, src);
> + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
> + }
> +}
> +
> +static void
> +exec_f2d(struct tgsi_exec_machine *mach,
> + const struct tgsi_full_instruction *inst)
> +{
> + union tgsi_exec_channel src;
> + union tgsi_double_channel dst;
> +
> + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
> + fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
> + micro_f2d(&dst, &src);
> + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
> + }
> + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
> + fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
> + micro_f2d(&dst, &src);
> + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
> + }
> +}
> +
> +static void
> +exec_d2f(struct tgsi_exec_machine *mach,
> + const struct tgsi_full_instruction *inst)
> +{
> + union tgsi_double_channel src;
> + union tgsi_exec_channel dst;
> + int wm = inst->Dst[0].Register.WriteMask;
> + int i;
> + int bit;
> + for (i = 0; i < 2; i++) {
> + bit = ffs(wm);
> + if (bit) {
> + wm &= ~(1 << (bit - 1));
> + if (i == 0)
> + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
> + else
> + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
> + micro_d2f(&dst, &src);
> + store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_FLOAT);
> + }
> + }
> +}
> +
> +static void
> +exec_i2d(struct tgsi_exec_machine *mach,
> + const struct tgsi_full_instruction *inst)
> +{
> + union tgsi_exec_channel src;
> + union tgsi_double_channel dst;
> +
> + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
> + fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
> + micro_i2d(&dst, &src);
> + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
> + }
> + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
> + fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_INT);
> + micro_i2d(&dst, &src);
> + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
> + }
> +}
> +
> +static void
> +exec_d2i(struct tgsi_exec_machine *mach,
> + const struct tgsi_full_instruction *inst)
> +{
> + union tgsi_double_channel src;
> + union tgsi_exec_channel dst;
> + int wm = inst->Dst[0].Register.WriteMask;
> + int i;
> + int bit;
> + for (i = 0; i < 2; i++) {
> + bit = ffs(wm);
> + if (bit) {
> + wm &= ~(1 << (bit - 1));
> + if (i == 0)
> + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
> + else
> + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
> + micro_d2i(&dst, &src);
> + store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_INT);
> + }
> + }
> +}
> +static void
> +exec_u2d(struct tgsi_exec_machine *mach,
> + const struct tgsi_full_instruction *inst)
> +{
> + union tgsi_exec_channel src;
> + union tgsi_double_channel dst;
> +
> + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
> + fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
> + micro_u2d(&dst, &src);
> + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
> + }
> + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
> + fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_UINT);
> + micro_u2d(&dst, &src);
> + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
> + }
> +}
> +
> +static void
> +exec_d2u(struct tgsi_exec_machine *mach,
> + const struct tgsi_full_instruction *inst)
> +{
> + union tgsi_double_channel src;
> + union tgsi_exec_channel dst;
> + int wm = inst->Dst[0].Register.WriteMask;
> + int i;
> + int bit;
> + for (i = 0; i < 2; i++) {
> + bit = ffs(wm);
> + if (bit) {
> + wm &= ~(1 << (bit - 1));
> + if (i == 0)
> + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
> + else
> + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
> + micro_d2u(&dst, &src);
> + store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_UINT);
> + }
> + }
> +}
> +
> +static void
> +exec_dldexp(struct tgsi_exec_machine *mach,
> + const struct tgsi_full_instruction *inst)
> +{
> + union tgsi_double_channel src0;
> + union tgsi_exec_channel src1;
> + union tgsi_double_channel dst;
> + int wmask;
> +
> + wmask = inst->Dst[0].Register.WriteMask;
> + if (wmask & TGSI_WRITEMASK_XY) {
> +
> + fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
> + fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
> + micro_dldexp(&dst, &src0, &src1);
> + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
> + }
> +
> + if (wmask & TGSI_WRITEMASK_ZW) {
> + fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
> + fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
> + micro_dldexp(&dst, &src0, &src1);
> + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
> + }
> +}
> +
> +static void
> +exec_dfracexp(struct tgsi_exec_machine *mach,
> + const struct tgsi_full_instruction *inst)
> +{
> + union tgsi_double_channel src;
> + union tgsi_double_channel dst;
> + union tgsi_exec_channel dst_exp;
> +
> + if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)) {
> + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
> + micro_dfracexp(&dst, &dst_exp, &src);
> + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
> + store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
> + }
> + if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)) {
> + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
> + micro_dfracexp(&dst, &dst_exp, &src);
> + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
> + store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
> + }
> +}
> +
> +
> static void
> micro_i2f(union tgsi_exec_channel *dst,
> const union tgsi_exec_channel *src)
> @@ -4336,6 +4963,98 @@ exec_instruction(
> case TGSI_OPCODE_UMSB:
> exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
> break;
> +
> + case TGSI_OPCODE_F2D:
> + exec_f2d(mach, inst);
> + break;
> +
> + case TGSI_OPCODE_D2F:
> + exec_d2f(mach, inst);
> + break;
> +
> + case TGSI_OPCODE_DABS:
> + exec_double_unary(mach, inst, micro_dabs);
> + break;
> +
> + case TGSI_OPCODE_DNEG:
> + exec_double_unary(mach, inst, micro_dneg);
> + break;
> +
> + case TGSI_OPCODE_DADD:
> + exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE);
> + break;
> +
> + case TGSI_OPCODE_DDIV:
> + exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE);
> + break;
> +
> + case TGSI_OPCODE_DMUL:
> + exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE);
> + break;
> +
> + case TGSI_OPCODE_DMAX:
> + exec_double_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_DOUBLE);
> + break;
> +
> + case TGSI_OPCODE_DMIN:
> + exec_double_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_DOUBLE);
> + break;
> +
> + case TGSI_OPCODE_DSLT:
> + exec_double_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT);
> + break;
> +
> + case TGSI_OPCODE_DSGE:
> + exec_double_binary(mach, inst, micro_dsge, TGSI_EXEC_DATA_UINT);
> + break;
> +
> + case TGSI_OPCODE_DSEQ:
> + exec_double_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT);
> + break;
> +
> + case TGSI_OPCODE_DSNE:
> + exec_double_binary(mach, inst, micro_dsne, TGSI_EXEC_DATA_UINT);
> + break;
> +
> + case TGSI_OPCODE_DRCP:
> + exec_double_unary(mach, inst, micro_drcp);
> + break;
> +
> + case TGSI_OPCODE_DSQRT:
> + exec_double_unary(mach, inst, micro_dsqrt);
> + break;
> +
> + case TGSI_OPCODE_DMAD:
> + exec_double_trinary(mach, inst, micro_dmad);
> + break;
> +
> + case TGSI_OPCODE_DFRAC:
> + exec_double_unary(mach, inst, micro_dfrac);
> + break;
> +
> + case TGSI_OPCODE_DLDEXP:
> + exec_dldexp(mach, inst);
> + break;
> +
> + case TGSI_OPCODE_DFRACEXP:
> + exec_dfracexp(mach, inst);
> + break;
> +
> + case TGSI_OPCODE_I2D:
> + exec_i2d(mach, inst);
> + break;
> +
> + case TGSI_OPCODE_D2I:
> + exec_d2i(mach, inst);
> + break;
> +
> + case TGSI_OPCODE_U2D:
> + exec_u2d(mach, inst);
> + break;
> +
> + case TGSI_OPCODE_D2U:
> + exec_d2u(mach, inst);
> + break;
> default:
> assert( 0 );
> }
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
> index c90d24c..05c5cb5 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
> @@ -231,10 +231,32 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
> { 1, 1, 0, 0, 0, 0, COMP, "LSB", TGSI_OPCODE_LSB },
> { 1, 1, 0, 0, 0, 0, COMP, "IMSB", TGSI_OPCODE_IMSB },
> { 1, 1, 0, 0, 0, 0, COMP, "UMSB", TGSI_OPCODE_UMSB },
> -
> { 1, 1, 0, 0, 0, 0, OTHR, "INTERP_CENTROID", TGSI_OPCODE_INTERP_CENTROID },
> { 1, 2, 0, 0, 0, 0, OTHR, "INTERP_SAMPLE", TGSI_OPCODE_INTERP_SAMPLE },
> { 1, 2, 0, 0, 0, 0, OTHR, "INTERP_OFFSET", TGSI_OPCODE_INTERP_OFFSET },
> + { 1, 1, 0, 0, 0, 0, COMP, "F2D", TGSI_OPCODE_F2D },
> + { 1, 1, 0, 0, 0, 0, COMP, "D2F", TGSI_OPCODE_D2F },
> + { 1, 1, 0, 0, 0, 0, COMP, "DABS", TGSI_OPCODE_DABS },
> + { 1, 1, 0, 0, 0, 0, COMP, "DNEG", TGSI_OPCODE_DNEG },
> + { 1, 2, 0, 0, 0, 0, COMP, "DADD", TGSI_OPCODE_DADD },
> + { 1, 2, 0, 0, 0, 0, COMP, "DDIV", TGSI_OPCODE_DDIV },
> + { 1, 2, 0, 0, 0, 0, COMP, "DMUL", TGSI_OPCODE_DMUL },
> + { 1, 2, 0, 0, 0, 0, COMP, "DMAX", TGSI_OPCODE_DMAX },
> + { 1, 2, 0, 0, 0, 0, COMP, "DMIN", TGSI_OPCODE_DMIN },
> + { 1, 2, 0, 0, 0, 0, COMP, "DSLT", TGSI_OPCODE_DSLT },
> + { 1, 2, 0, 0, 0, 0, COMP, "DSGE", TGSI_OPCODE_DSGE },
> + { 1, 2, 0, 0, 0, 0, COMP, "DSEQ", TGSI_OPCODE_DSEQ },
> + { 1, 2, 0, 0, 0, 0, COMP, "DSNE", TGSI_OPCODE_DSNE },
> + { 1, 1, 0, 0, 0, 0, COMP, "DRCP", TGSI_OPCODE_DRCP },
> + { 1, 1, 0, 0 ,0, 0, COMP, "DSQRT", TGSI_OPCODE_DSQRT },
> + { 1, 3, 0, 0 ,0, 0, COMP, "DMAD", TGSI_OPCODE_DMAD },
> + { 1, 1, 0, 0, 0, 0, COMP, "DFRAC", TGSI_OPCODE_DFRAC},
> + { 1, 2, 0, 0, 0, 0, COMP, "DLDEXP", TGSI_OPCODE_DLDEXP},
> + { 2, 1, 0, 0, 0, 0, COMP, "DFRACEXP", TGSI_OPCODE_DFRACEXP},
> + { 1, 1, 0, 0, 0, 0, COMP, "D2I", TGSI_OPCODE_D2I },
> + { 1, 1, 0, 0, 0, 0, COMP, "I2D", TGSI_OPCODE_I2D },
> + { 1, 1, 0, 0, 0, 0, COMP, "D2U", TGSI_OPCODE_D2U },
> + { 1, 1, 0, 0, 0, 0, COMP, "U2D", TGSI_OPCODE_U2D },
> };
>
> const struct tgsi_opcode_info *
> diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
> index ff322e8..20b0102 100644
> --- a/src/gallium/docs/source/tgsi.rst
> +++ b/src/gallium/docs/source/tgsi.rst
> @@ -1810,7 +1810,10 @@ Double ISA
> The double-precision opcodes reinterpret four-component vectors into
> two-component vectors with doubled precision in each component.
>
> -Support for these opcodes is XXX undecided. :T
> +.. opcode:: DABS - Absolute
> +
> + dst.xy = |src0.xy|
> + dst.zw = |src0.zw|
>
> .. opcode:: DADD - Add
>
> @@ -1833,17 +1836,33 @@ Support for these opcodes is XXX undecided. :T
>
> .. math::
>
> - dst.xy = src0.xy == src1.xy ? 1.0F : 0.0F
> + dst.x = src0.xy == src1.xy ? \sim 0 : 0
> +
> + dst.z = src0.zw == src1.zw ? \sim 0 : 0
> +
> +.. opcode:: DSNE - Set on Equal
Set on Not Equal
> +
> +.. math::
> +
> + dst.x = src0.xy != src1.xy ? \sim 0 : 0
>
> - dst.zw = src0.zw == src1.zw ? 1.0F : 0.0F
> + dst.z = src0.zw != src1.zw ? \sim 0 : 0
>
> .. opcode:: DSLT - Set on Less than
>
> .. math::
>
> - dst.xy = src0.xy < src1.xy ? 1.0F : 0.0F
> + dst.x = src0.xy < src1.xy ? \sim 0 : 0
> +
> + dst.z = src0.zw < src1.zw ? \sim 0 : 0
> +
> +.. opcode:: DSGE - Set on Greater equal
> +
> +.. math::
>
> - dst.zw = src0.zw < src1.zw ? 1.0F : 0.0F
> + dst.x = src0.xy >= src1.xy ? \sim 0 : 0
> +
> + dst.z = src0.zw >= src1.zw ? \sim 0 : 0
>
> .. opcode:: DFRAC - Fraction
>
> @@ -1930,7 +1949,54 @@ This opcode is the inverse of :opcode:`DFRACEXP`.
>
> dst.zw = \sqrt{src.zw}
>
> +.. opcode:: F2D - Float to Double
> +
> +.. math::
> +
> + dst.xy = double(src0.x)
> +
> + dst.zw = double(src0.z)
> +
> +.. opcode:: D2F - Double to Float
> +
> +.. math::
> +
> + dst.x = float(src0.xy)
> +
> + dst.z = float(src0.zw)
> +
> +.. opcode:: I2D - Int to Double
> +
> +.. math::
> +
> + dst.xy = double(src0.x)
> +
> + dst.zw = double(src0.z)
> +
> +.. opcode:: D2I - Double to Int
> +
> +.. math::
> +
> + dst.x = int(src0.xy)
> +
> + dst.z = int(src0.zw)
> +
> +.. opcode:: U2D - Unsigned Int to Double
> +
> +.. math::
> +
> + dst.xy = double(src0.x)
> +
> + dst.zw = double(src0.z)
> +
> +.. opcode:: D2U - Double to Unsigned Int
> +
> +.. math::
>
> + dst.x = unsigned(src0.xy)
> +
> + dst.z = unsigned(src0.zw)
> +
> .. _samplingopcodes:
>
> Resource Sampling Opcodes
> diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
> index 442b67b..970e168 100644
> --- a/src/gallium/include/pipe/p_shader_tokens.h
> +++ b/src/gallium/include/pipe/p_shader_tokens.h
> @@ -494,7 +494,31 @@ struct tgsi_property_data {
> #define TGSI_OPCODE_INTERP_SAMPLE 193
> #define TGSI_OPCODE_INTERP_OFFSET 194
>
> -#define TGSI_OPCODE_LAST 195
> +/* sm5 marked opcodes are supported in D3D11 optionally - also DMOV, DMOVC */
> +#define TGSI_OPCODE_F2D 195 /* SM5 */
> +#define TGSI_OPCODE_D2F 196
> +#define TGSI_OPCODE_DABS 197
> +#define TGSI_OPCODE_DNEG 198 /* SM5 */
> +#define TGSI_OPCODE_DADD 199 /* SM5 */
> +#define TGSI_OPCODE_DDIV 200 /* SM5 */
> +#define TGSI_OPCODE_DMUL 201 /* SM5 */
> +#define TGSI_OPCODE_DMAX 202 /* SM5 */
> +#define TGSI_OPCODE_DMIN 203 /* SM5 */
> +#define TGSI_OPCODE_DSLT 204 /* SM5 */
> +#define TGSI_OPCODE_DSGE 205 /* SM5 */
> +#define TGSI_OPCODE_DSEQ 206 /* SM5 */
> +#define TGSI_OPCODE_DSNE 207 /* SM5 */
> +#define TGSI_OPCODE_DRCP 208 /* eg, cayman */
> +#define TGSI_OPCODE_DSQRT 209 /* eg, cayman also has DRSQ */
> +#define TGSI_OPCODE_DMAD 210 /* DFMA? */
> +#define TGSI_OPCODE_DFRAC 211 /* eg, cayman */
> +#define TGSI_OPCODE_DLDEXP 212 /* eg, cayman */
> +#define TGSI_OPCODE_DFRACEXP 213 /* eg, cayman */
> +#define TGSI_OPCODE_D2I 214
> +#define TGSI_OPCODE_I2D 215
> +#define TGSI_OPCODE_D2U 216
> +#define TGSI_OPCODE_U2D 217
> +#define TGSI_OPCODE_LAST 218
>
> #define TGSI_SAT_NONE 0 /* do not saturate */
> #define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */
>
More information about the mesa-dev
mailing list