[Mesa-dev] [PATCH 04/11] tgsi: clarify the semantics of DFRACEXP
Nicolai Hähnle
nhaehnle at gmail.com
Sat Sep 16 11:23:46 UTC 2017
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
The status quo is quite the mess:
1. tgsi_exec will do a per-channel computation, and store the dst[0]
result (significand) correctly for each channel. The dst[1] result
(exponent) will be written to the first bit set in the writemask.
So per-component calculation only works partially.
2. r600 will only do a single computation. It will replicate the
exponent but not the significand.
3. The docs pretend that there's per-component calculation, but even
get dst[0] and dst[1] confused.
4. Luckily, st_glsl_to_tgsi only ever emits single-component instructions,
and kind-of assumes that everything is replicated, generating this for
the dvec4 case:
DFRACEXP TEMP[0].xy, TEMP[1].x, CONST[0][0].xyxy
DFRACEXP TEMP[0].zw, TEMP[1].y, CONST[0][0].zwzw
DFRACEXP TEMP[2].xy, TEMP[1].z, CONST[0][1].xyxy
DFRACEXP TEMP[2].zw, TEMP[1].w, CONST[0][1].zwzw
Settle on the simplest behavior, which is single-component calculation
with replication, document it, and adjust tgsi_exec and r600.
---
src/gallium/auxiliary/tgsi/tgsi_exec.c | 16 +++++++---------
src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h | 2 +-
src/gallium/docs/source/tgsi.rst | 10 ++++------
src/gallium/drivers/r600/r600_shader.c | 14 ++++++++------
4 files changed, 20 insertions(+), 22 deletions(-)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 1264df0c622..2a47f5dfaef 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3681,31 +3681,29 @@ exec_dldexp(struct tgsi_exec_machine *mach,
}
static void
exec_dfracexp(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
{
union tgsi_double_channel src;
union tgsi_double_channel dst;
union tgsi_exec_channel dst_exp;
- if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)) {
- fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
- micro_dfracexp(&dst, &dst_exp, &src);
+ fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
+ micro_dfracexp(&dst, &dst_exp, &src);
+ if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
- store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
- }
- if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)) {
- fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
- micro_dfracexp(&dst, &dst_exp, &src);
+ if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
- store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
+ for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[1].Register.WriteMask & (1 << chan))
+ store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan, TGSI_EXEC_DATA_INT);
}
}
static void
exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst,
micro_dop_sop op)
{
union tgsi_double_channel src0;
union tgsi_exec_channel src1;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h b/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h
index a4a97711750..3f39afe2196 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h
@@ -205,21 +205,21 @@ OPCODE(1, 2, COMP, DMAX)
OPCODE(1, 2, COMP, DMIN)
OPCODE(1, 2, COMP, DSLT)
OPCODE(1, 2, COMP, DSGE)
OPCODE(1, 2, COMP, DSEQ)
OPCODE(1, 2, COMP, DSNE)
OPCODE(1, 1, COMP, DRCP)
OPCODE(1, 1, COMP, DSQRT)
OPCODE(1, 3, COMP, DMAD)
OPCODE(1, 1, COMP, DFRAC)
OPCODE(1, 2, COMP, DLDEXP)
-OPCODE(2, 1, COMP, DFRACEXP)
+OPCODE(2, 1, REPL, DFRACEXP)
OPCODE(1, 1, COMP, D2I)
OPCODE(1, 1, COMP, I2D)
OPCODE(1, 1, COMP, D2U)
OPCODE(1, 1, COMP, U2D)
OPCODE(1, 1, COMP, DRSQ)
OPCODE(1, 1, COMP, DTRUNC)
OPCODE(1, 1, COMP, DCEIL)
OPCODE(1, 1, COMP, DFLR)
OPCODE(1, 1, COMP, DROUND)
OPCODE(1, 1, COMP, DSSG)
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 8633c929b9f..fd78c40ba3c 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -1831,31 +1831,29 @@ two-component vectors with doubled precision in each component.
.. math::
dst.xy = (src.xy > 0) ? 1.0 : (src.xy < 0) ? -1.0 : 0.0
dst.zw = (src.zw > 0) ? 1.0 : (src.zw < 0) ? -1.0 : 0.0
.. opcode:: DFRACEXP - Convert Number to Fractional and Integral Components
Like the ``frexp()`` routine in many math libraries, this opcode stores the
exponent of its source to ``dst0``, and the significand to ``dst1``, such that
-:math:`dst1 \times 2^{dst0} = src` .
+:math:`dst1 \times 2^{dst0} = src` . The results are replicated across
+channels.
.. math::
- dst0.xy = exp(src.xy)
+ dst0.xy = dst.zw = frac(src.xy)
- dst1.xy = frac(src.xy)
+ dst1 = frac(src.xy)
- dst0.zw = exp(src.zw)
-
- dst1.zw = frac(src.zw)
.. opcode:: DLDEXP - Multiply Number by Integral Power of 2
This opcode is the inverse of :opcode:`DFRACEXP`. The second
source is an integer.
.. math::
dst.xy = src0.xy \times 2^{src1.x}
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index c9c922fc02b..188fbc9d47d 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -4038,50 +4038,52 @@ static int tgsi_dneg(struct r600_shader_ctx *ctx)
return 0;
}
static int tgsi_dfracexp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
unsigned write_mask = inst->Dst[0].Register.WriteMask;
int i, j, r;
- int firsti = write_mask == 0xc ? 2 : 0;
for (i = 0; i <= 3; i++) {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ctx->inst_info->op;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
alu.dst.write = 1;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i));
}
if (i == 3)
alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- /* MOV first two channels to writemask dst0 */
- for (i = 0; i <= 1; i++) {
+ /* Replicate significand result across channels. */
+ for (i = 0; i <= 3; i++) {
+ if (!(write_mask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
- alu.src[0].chan = i + 2;
+ alu.src[0].chan = (i & 1) + 2;
alu.src[0].sel = ctx->temp_reg;
- tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst);
- alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) & 1;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.write = 1;
alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
}
for (i = 0; i <= 3; i++) {
if (inst->Dst[1].Register.WriteMask & (1 << i)) {
/* MOV third channels to writemask dst1 */
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
--
2.11.0
More information about the mesa-dev
mailing list