[Mesa-dev] [PATCH v2 3/6] tgsi/softpipe: prepare ARB_gpu_shader_int64 support. (v3)
Nicolai Hähnle
nhaehnle at gmail.com
Mon Sep 19 13:08:32 UTC 2016
From: Dave Airlie <airlied at redhat.com>
This adds all the opcodes to tgsi_exec for softpipe to use.
v2: add conversion opcodes.
v3:
- no PIPE_CAP_INT64 yet
- change TGSI_OPCODE_I2U64 to TGSI_OPCODE_U2I64
Signed-off-by: Dave Airlie <airlied at redhat.com>
---
src/gallium/auxiliary/tgsi/tgsi_exec.c | 673 ++++++++++++++++++++++++++-------
1 file changed, 541 insertions(+), 132 deletions(-)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 37f3fc7..7b5c56d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -687,25 +687,265 @@ micro_trunc(union tgsi_exec_channel *dst,
static void
micro_u2d(union tgsi_double_channel *dst,
const union tgsi_exec_channel *src)
{
dst->d[0] = (double)src->u[0];
dst->d[1] = (double)src->u[1];
dst->d[2] = (double)src->u[2];
dst->d[3] = (double)src->u[3];
}
+static void
+micro_i64abs(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0];
+ dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1];
+ dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2];
+ dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3];
+}
+
+static void
+micro_i64sgn(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0;
+ dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0;
+ dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0;
+ dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0;
+}
+
+static void
+micro_i64neg(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->i64[0] = -src->i64[0];
+ dst->i64[1] = -src->i64[1];
+ dst->i64[2] = -src->i64[2];
+ dst->i64[3] = -src->i64[3];
+}
+
+static void
+micro_u64seq(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U;
+ dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U;
+ dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U;
+ dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U;
+}
+
+static void
+micro_u64sne(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U;
+ dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U;
+ dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U;
+ dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U;
+}
+
+static void
+micro_i64slt(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U;
+ dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U;
+ dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U;
+ dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U;
+}
+
+static void
+micro_u64slt(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U;
+ dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U;
+ dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U;
+ dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U;
+}
+
+static void
+micro_i64sge(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U;
+ dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U;
+ dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U;
+ dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U;
+}
+
+static void
+micro_u64sge(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U;
+ dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U;
+ dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U;
+ dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U;
+}
+
+static void
+micro_u64max(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
+ dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
+ dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
+ dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
+}
+
+static void
+micro_i64max(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
+ dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
+ dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
+ dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
+}
+
+static void
+micro_u64min(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
+ dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
+ dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
+ dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
+}
+
+static void
+micro_i64min(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
+ dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
+ dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
+ dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
+}
+
+static void
+micro_u64add(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u64[0] = src[0].u64[0] + src[1].u64[0];
+ dst->u64[1] = src[0].u64[1] + src[1].u64[1];
+ dst->u64[2] = src[0].u64[2] + src[1].u64[2];
+ dst->u64[3] = src[0].u64[3] + src[1].u64[3];
+}
+
+static void
+micro_u64mul(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u64[0] = src[0].u64[0] * src[1].u64[0];
+ dst->u64[1] = src[0].u64[1] * src[1].u64[1];
+ dst->u64[2] = src[0].u64[2] * src[1].u64[2];
+ dst->u64[3] = src[0].u64[3] * src[1].u64[3];
+}
+
+static void
+micro_u64div(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u64[0] = src[0].u64[0] / src[1].u64[0];
+ dst->u64[1] = src[0].u64[1] / src[1].u64[1];
+ dst->u64[2] = src[0].u64[2] / src[1].u64[2];
+ dst->u64[3] = src[0].u64[3] / src[1].u64[3];
+}
+
+static void
+micro_i64div(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->i64[0] = src[0].i64[0] / src[1].i64[0];
+ dst->i64[1] = src[0].i64[1] / src[1].i64[1];
+ dst->i64[2] = src[0].i64[2] / src[1].i64[2];
+ dst->i64[3] = src[0].i64[3] / src[1].i64[3];
+}
+
+static void
+micro_u64mod(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u64[0] = src[0].u64[0] % src[1].u64[0];
+ dst->u64[1] = src[0].u64[1] % src[1].u64[1];
+ dst->u64[2] = src[0].u64[2] % src[1].u64[2];
+ dst->u64[3] = src[0].u64[3] % src[1].u64[3];
+}
+
+static void
+micro_i64mod(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->i64[0] = src[0].i64[0] % src[1].i64[0];
+ dst->i64[1] = src[0].i64[1] % src[1].i64[1];
+ dst->i64[2] = src[0].i64[2] % src[1].i64[2];
+ dst->i64[3] = src[0].i64[3] % src[1].i64[3];
+}
+
+static void
+micro_u64shl(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src0,
+ union tgsi_exec_channel *src1)
+{
+ unsigned masked_count;
+ masked_count = src1->u[0] & 0x3f;
+ dst->u64[0] = src0->u64[0] << masked_count;
+ masked_count = src1->u[1] & 0x3f;
+ dst->u64[1] = src0->u64[1] << masked_count;
+ masked_count = src1->u[2] & 0x3f;
+ dst->u64[2] = src0->u64[2] << masked_count;
+ masked_count = src1->u[3] & 0x3f;
+ dst->u64[3] = src0->u64[3] << masked_count;
+}
+
+static void
+micro_i64shr(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src0,
+ union tgsi_exec_channel *src1)
+{
+ unsigned masked_count;
+ masked_count = src1->u[0] & 0x3f;
+ dst->i64[0] = src0->i64[0] >> masked_count;
+ masked_count = src1->u[1] & 0x3f;
+ dst->i64[1] = src0->i64[1] >> masked_count;
+ masked_count = src1->u[2] & 0x3f;
+ dst->i64[2] = src0->i64[2] >> masked_count;
+ masked_count = src1->u[3] & 0x3f;
+ dst->i64[3] = src0->i64[3] >> masked_count;
+}
+
+static void
+micro_u64shr(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src0,
+ union tgsi_exec_channel *src1)
+{
+ unsigned masked_count;
+ masked_count = src1->u[0] & 0x3f;
+ dst->u64[0] = src0->u64[0] >> masked_count;
+ masked_count = src1->u[1] & 0x3f;
+ dst->u64[1] = src0->u64[1] >> masked_count;
+ masked_count = src1->u[2] & 0x3f;
+ dst->u64[2] = src0->u64[2] >> masked_count;
+ masked_count = src1->u[3] & 0x3f;
+ dst->u64[3] = src0->u64[3] >> masked_count;
+}
+
enum tgsi_exec_datatype {
TGSI_EXEC_DATA_FLOAT,
TGSI_EXEC_DATA_INT,
TGSI_EXEC_DATA_UINT,
- TGSI_EXEC_DATA_DOUBLE
+ TGSI_EXEC_DATA_DOUBLE,
+ TGSI_EXEC_DATA_INT64,
+ TGSI_EXEC_DATA_UINT64,
};
/*
* Shorthand locations of various utility registers (_I = Index, _C = Channel)
*/
#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
@@ -3391,20 +3631,30 @@ exec_endswitch(struct tgsi_exec_machine *mach)
{
mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
UPDATE_EXEC_MASK(mach);
}
typedef void (* micro_dop)(union tgsi_double_channel *dst,
const union tgsi_double_channel *src);
+typedef void (* micro_dop_sop)(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src0,
+ union tgsi_exec_channel *src1);
+
+typedef void (* micro_dop_s)(union tgsi_double_channel *dst,
+ const union tgsi_exec_channel *src);
+
+typedef void (* micro_sop_d)(union tgsi_exec_channel *dst,
+ const union tgsi_double_channel *src);
+
static void
fetch_double_channel(struct tgsi_exec_machine *mach,
union tgsi_double_channel *chan,
const struct tgsi_full_src_register *reg,
uint chan_0,
uint chan_1)
{
union tgsi_exec_channel src[2];
uint i;
@@ -3543,145 +3793,20 @@ exec_double_trinary(struct tgsi_exec_machine *mach,
if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W);
op(&dst, src);
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
}
}
static void
-exec_f2d(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
-{
- union tgsi_exec_channel src;
- union tgsi_double_channel dst;
-
- if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
- fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
- micro_f2d(&dst, &src);
- store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
- }
- if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
- fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
- micro_f2d(&dst, &src);
- store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
- }
-}
-
-static void
-exec_d2f(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
-{
- union tgsi_double_channel src;
- union tgsi_exec_channel dst;
- int wm = inst->Dst[0].Register.WriteMask;
- int i;
- int bit;
- for (i = 0; i < 2; i++) {
- bit = ffs(wm);
- if (bit) {
- wm &= ~(1 << (bit - 1));
- if (i == 0)
- fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
- else
- fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
- micro_d2f(&dst, &src);
- store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_FLOAT);
- }
- }
-}
-
-static void
-exec_i2d(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
-{
- union tgsi_exec_channel src;
- union tgsi_double_channel dst;
-
- if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
- fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
- micro_i2d(&dst, &src);
- store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
- }
- if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
- fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_INT);
- micro_i2d(&dst, &src);
- store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
- }
-}
-
-static void
-exec_d2i(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
-{
- union tgsi_double_channel src;
- union tgsi_exec_channel dst;
- int wm = inst->Dst[0].Register.WriteMask;
- int i;
- int bit;
- for (i = 0; i < 2; i++) {
- bit = ffs(wm);
- if (bit) {
- wm &= ~(1 << (bit - 1));
- if (i == 0)
- fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
- else
- fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
- micro_d2i(&dst, &src);
- store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_INT);
- }
- }
-}
-static void
-exec_u2d(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
-{
- union tgsi_exec_channel src;
- union tgsi_double_channel dst;
-
- if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
- fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
- micro_u2d(&dst, &src);
- store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
- }
- if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
- fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_UINT);
- micro_u2d(&dst, &src);
- store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
- }
-}
-
-static void
-exec_d2u(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
-{
- union tgsi_double_channel src;
- union tgsi_exec_channel dst;
- int wm = inst->Dst[0].Register.WriteMask;
- int i;
- int bit;
- for (i = 0; i < 2; i++) {
- bit = ffs(wm);
- if (bit) {
- wm &= ~(1 << (bit - 1));
- if (i == 0)
- fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
- else
- fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
- micro_d2u(&dst, &src);
- store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_UINT);
- }
- }
-}
-
-static void
exec_dldexp(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
{
union tgsi_double_channel src0;
union tgsi_exec_channel src1;
union tgsi_double_channel dst;
int wmask;
wmask = inst->Dst[0].Register.WriteMask;
if (wmask & TGSI_WRITEMASK_XY) {
@@ -3714,20 +3839,46 @@ exec_dfracexp(struct tgsi_exec_machine *mach,
store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
}
if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)) {
fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
micro_dfracexp(&dst, &dst_exp, &src);
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
}
}
+static void
+exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ micro_dop_sop op)
+{
+ union tgsi_double_channel src0;
+ union tgsi_exec_channel src1;
+ union tgsi_double_channel dst;
+ int wmask;
+
+ wmask = inst->Dst[0].Register.WriteMask;
+ if (wmask & TGSI_WRITEMASK_XY) {
+ fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
+ fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
+ op(&dst, &src0, &src1);
+ store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
+ }
+
+ if (wmask & TGSI_WRITEMASK_ZW) {
+ fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
+ fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
+ op(&dst, &src0, &src1);
+ store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
+ }
+}
+
static int
get_image_coord_dim(unsigned tgsi_tex)
{
int dim;
switch (tgsi_tex) {
case TGSI_TEXTURE_BUFFER:
case TGSI_TEXTURE_1D:
dim = 1;
break;
case TGSI_TEXTURE_2D:
@@ -4297,20 +4448,166 @@ static void
exec_resq(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
{
if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
exec_resq_img(mach, inst);
else
exec_resq_buf(mach, inst);
}
static void
+micro_f2u64(union tgsi_double_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u64[0] = (uint64_t)src->f[0];
+ dst->u64[1] = (uint64_t)src->f[1];
+ dst->u64[2] = (uint64_t)src->f[2];
+ dst->u64[3] = (uint64_t)src->f[3];
+}
+
+static void
+micro_f2i64(union tgsi_double_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i64[0] = (int64_t)src->f[0];
+ dst->i64[1] = (int64_t)src->f[1];
+ dst->i64[2] = (int64_t)src->f[2];
+ dst->i64[3] = (int64_t)src->f[3];
+}
+
+static void
+micro_u2i64(union tgsi_double_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->u64[0] = (uint64_t)src->u[0];
+ dst->u64[1] = (uint64_t)src->u[1];
+ dst->u64[2] = (uint64_t)src->u[2];
+ dst->u64[3] = (uint64_t)src->u[3];
+}
+
+static void
+micro_i2i64(union tgsi_double_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->i64[0] = (int64_t)src->i[0];
+ dst->i64[1] = (int64_t)src->i[1];
+ dst->i64[2] = (int64_t)src->i[2];
+ dst->i64[3] = (int64_t)src->i[3];
+}
+
+static void
+micro_d2u64(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->u64[0] = (uint64_t)src->d[0];
+ dst->u64[1] = (uint64_t)src->d[1];
+ dst->u64[2] = (uint64_t)src->d[2];
+ dst->u64[3] = (uint64_t)src->d[3];
+}
+
+static void
+micro_d2i64(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->i64[0] = (int64_t)src->d[0];
+ dst->i64[1] = (int64_t)src->d[1];
+ dst->i64[2] = (int64_t)src->d[2];
+ dst->i64[3] = (int64_t)src->d[3];
+}
+
+static void
+micro_u642d(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->d[0] = (double)src->u64[0];
+ dst->d[1] = (double)src->u64[1];
+ dst->d[2] = (double)src->u64[2];
+ dst->d[3] = (double)src->u64[3];
+}
+
+static void
+micro_i642d(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->d[0] = (double)src->i64[0];
+ dst->d[1] = (double)src->i64[1];
+ dst->d[2] = (double)src->i64[2];
+ dst->d[3] = (double)src->i64[3];
+}
+
+static void
+micro_u642f(union tgsi_exec_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->f[0] = (float)src->u64[0];
+ dst->f[1] = (float)src->u64[1];
+ dst->f[2] = (float)src->u64[2];
+ dst->f[3] = (float)src->u64[3];
+}
+
+static void
+micro_i642f(union tgsi_exec_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->f[0] = (float)src->i64[0];
+ dst->f[1] = (float)src->i64[1];
+ dst->f[2] = (float)src->i64[2];
+ dst->f[3] = (float)src->i64[3];
+}
+
+static void
+exec_t_2_64(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ micro_dop_s op,
+ enum tgsi_exec_datatype src_datatype)
+{
+ union tgsi_exec_channel src;
+ union tgsi_double_channel dst;
+
+ if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
+ fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
+ op(&dst, &src);
+ store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
+ }
+ if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
+ fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype);
+ op(&dst, &src);
+ store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
+ }
+}
+
+static void
+exec_64_2_t(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ micro_sop_d op,
+ enum tgsi_exec_datatype dst_datatype)
+{
+ union tgsi_double_channel src;
+ union tgsi_exec_channel dst;
+ int wm = inst->Dst[0].Register.WriteMask;
+ int i;
+ int bit;
+ for (i = 0; i < 2; i++) {
+ bit = ffs(wm);
+ if (bit) {
+ wm &= ~(1 << (bit - 1));
+ if (i == 0)
+ fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
+ else
+ fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
+ op(&dst, &src);
+ store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, dst_datatype);
+ }
+ }
+}
+
+static void
micro_i2f(union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src)
{
dst->f[0] = (float)src->i[0];
dst->f[1] = (float)src->i[1];
dst->f[2] = (float)src->i[2];
dst->f[3] = (float)src->i[3];
}
static void
@@ -5680,25 +5977,25 @@ exec_instruction(
exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
break;
case TGSI_OPCODE_IMSB:
exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
break;
case TGSI_OPCODE_UMSB:
exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
break;
case TGSI_OPCODE_F2D:
- exec_f2d(mach, inst);
+ exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_D2F:
- exec_d2f(mach, inst);
+ exec_64_2_t(mach, inst, micro_d2f, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_DABS:
exec_double_unary(mach, inst, micro_dabs);
break;
case TGSI_OPCODE_DNEG:
exec_double_unary(mach, inst, micro_dneg);
break;
@@ -5756,33 +6053,33 @@ exec_instruction(
case TGSI_OPCODE_DLDEXP:
exec_dldexp(mach, inst);
break;
case TGSI_OPCODE_DFRACEXP:
exec_dfracexp(mach, inst);
break;
case TGSI_OPCODE_I2D:
- exec_i2d(mach, inst);
+ exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_INT);
break;
case TGSI_OPCODE_D2I:
- exec_d2i(mach, inst);
+ exec_64_2_t(mach, inst, micro_d2i, TGSI_EXEC_DATA_INT);
break;
case TGSI_OPCODE_U2D:
- exec_u2d(mach, inst);
+ exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_UINT);
break;
case TGSI_OPCODE_D2U:
- exec_d2u(mach, inst);
+ exec_64_2_t(mach, inst, micro_d2u, TGSI_EXEC_DATA_INT);
break;
case TGSI_OPCODE_LOAD:
exec_load(mach, inst);
break;
case TGSI_OPCODE_STORE:
exec_store(mach, inst);
break;
@@ -5799,20 +6096,132 @@ exec_instruction(
exec_atomop(mach, inst);
break;
case TGSI_OPCODE_RESQ:
exec_resq(mach, inst);
break;
case TGSI_OPCODE_BARRIER:
case TGSI_OPCODE_MEMBAR:
return TRUE;
break;
+
+ case TGSI_OPCODE_I64ABS:
+ exec_double_unary(mach, inst, micro_i64abs);
+ break;
+
+ case TGSI_OPCODE_I64SSG:
+ exec_double_unary(mach, inst, micro_i64sgn);
+ break;
+
+ case TGSI_OPCODE_I64NEG:
+ exec_double_unary(mach, inst, micro_i64neg);
+ break;
+
+ case TGSI_OPCODE_U64SEQ:
+ exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_U64SNE:
+ exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_I64SLT:
+ exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT);
+ break;
+ case TGSI_OPCODE_U64SLT:
+ exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_I64SGE:
+ exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT);
+ break;
+ case TGSI_OPCODE_U64SGE:
+ exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT);
+ break;
+
+ case TGSI_OPCODE_I64MIN:
+ exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64);
+ break;
+ case TGSI_OPCODE_U64MIN:
+ exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64);
+ break;
+ case TGSI_OPCODE_I64MAX:
+ exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64);
+ break;
+ case TGSI_OPCODE_U64MAX:
+ exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64);
+ break;
+ case TGSI_OPCODE_U64ADD:
+ exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64);
+ break;
+ case TGSI_OPCODE_U64MUL:
+ exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64);
+ break;
+ case TGSI_OPCODE_U64SHL:
+ exec_arg0_64_arg1_32(mach, inst, micro_u64shl);
+ break;
+ case TGSI_OPCODE_I64SHR:
+ exec_arg0_64_arg1_32(mach, inst, micro_i64shr);
+ break;
+ case TGSI_OPCODE_U64SHR:
+ exec_arg0_64_arg1_32(mach, inst, micro_u64shr);
+ break;
+ case TGSI_OPCODE_U64DIV:
+ exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64);
+ break;
+ case TGSI_OPCODE_I64DIV:
+ exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64);
+ break;
+ case TGSI_OPCODE_U64MOD:
+ exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64);
+ break;
+ case TGSI_OPCODE_I64MOD:
+ exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64);
+ break;
+
+ case TGSI_OPCODE_F2U64:
+ exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT);
+ break;
+
+ case TGSI_OPCODE_F2I64:
+ exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT);
+ break;
+
+ case TGSI_OPCODE_U2I64:
+ exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT);
+ break;
+ case TGSI_OPCODE_I2I64:
+ exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT);
+ break;
+
+ case TGSI_OPCODE_D2U64:
+ exec_double_unary(mach, inst, micro_d2u64);
+ break;
+
+ case TGSI_OPCODE_D2I64:
+ exec_double_unary(mach, inst, micro_d2i64);
+ break;
+
+ case TGSI_OPCODE_U642F:
+ exec_64_2_t(mach, inst, micro_u642f, TGSI_EXEC_DATA_FLOAT);
+ break;
+ case TGSI_OPCODE_I642F:
+ exec_64_2_t(mach, inst, micro_i642f, TGSI_EXEC_DATA_FLOAT);
+ break;
+
+ case TGSI_OPCODE_U642D:
+ exec_double_unary(mach, inst, micro_u642d);
+ break;
+ case TGSI_OPCODE_I642D:
+ exec_double_unary(mach, inst, micro_i642d);
+ break;
+
default:
assert( 0 );
}
return FALSE;
}
static void
tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach)
{
uint default_mask = 0xf;
--
2.7.4
More information about the mesa-dev
mailing list