[Mesa-dev] [PATCH 25/27] tgsi/softpipe: enable ARB_gpu_shader_int64 support. (v2)

Dave Airlie airlied at gmail.com
Mon Jun 20 05:07:11 UTC 2016


From: Dave Airlie <airlied at redhat.com>

This adds all the opcodes to tgsi_exec for softpipe to use.

It also enables the cap.

v2: add conversion opcodes.

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c   | 673 +++++++++++++++++++++++++------
 src/gallium/drivers/softpipe/sp_screen.c |   2 +
 2 files changed, 543 insertions(+), 132 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index e99caeb..ef3c077 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -694,11 +694,251 @@ micro_u2d(union tgsi_double_channel *dst,
    dst->d[3] = (double)src->u[3];
 }
 
+static void
+micro_i64abs(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src)
+{
+   dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0];
+   dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1];
+   dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2];
+   dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3];
+}
+
+static void
+micro_i64sgn(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src)
+{
+   dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0;
+   dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0;
+   dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0;
+   dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0;
+}
+
+static void
+micro_i64neg(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src)
+{
+   dst->i64[0] = -src->i64[0];
+   dst->i64[1] = -src->i64[1];
+   dst->i64[2] = -src->i64[2];
+   dst->i64[3] = -src->i64[3];
+}
+
+static void
+micro_u64seq(union tgsi_double_channel *dst,
+           const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U;
+   dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U;
+   dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U;
+   dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U;
+}
+
+static void
+micro_u64sne(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U;
+   dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U;
+   dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U;
+   dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U;
+}
+
+static void
+micro_i64slt(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U;
+   dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U;
+   dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U;
+   dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U;
+}
+
+static void
+micro_u64slt(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U;
+   dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U;
+   dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U;
+   dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U;
+}
+
+static void
+micro_i64sge(union tgsi_double_channel *dst,
+           const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U;
+   dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U;
+   dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U;
+   dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U;
+}
+
+static void
+micro_u64sge(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U;
+   dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U;
+   dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U;
+   dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U;
+}
+
+static void
+micro_u64max(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src)
+{
+   dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
+   dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
+   dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
+   dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
+}
+
+static void
+micro_i64max(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src)
+{
+   dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
+   dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
+   dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
+   dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
+}
+
+static void
+micro_u64min(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src)
+{
+   dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
+   dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
+   dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
+   dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
+}
+
+static void
+micro_i64min(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src)
+{
+   dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
+   dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
+   dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
+   dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
+}
+
+static void
+micro_u64add(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src)
+{
+   dst->u64[0] = src[0].u64[0] + src[1].u64[0];
+   dst->u64[1] = src[0].u64[1] + src[1].u64[1];
+   dst->u64[2] = src[0].u64[2] + src[1].u64[2];
+   dst->u64[3] = src[0].u64[3] + src[1].u64[3];
+}
+
+static void
+micro_u64mul(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src)
+{
+   dst->u64[0] = src[0].u64[0] * src[1].u64[0];
+   dst->u64[1] = src[0].u64[1] * src[1].u64[1];
+   dst->u64[2] = src[0].u64[2] * src[1].u64[2];
+   dst->u64[3] = src[0].u64[3] * src[1].u64[3];
+}
+
+static void
+micro_u64div(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src)
+{
+   dst->u64[0] = src[0].u64[0] / src[1].u64[0];
+   dst->u64[1] = src[0].u64[1] / src[1].u64[1];
+   dst->u64[2] = src[0].u64[2] / src[1].u64[2];
+   dst->u64[3] = src[0].u64[3] / src[1].u64[3];
+}
+
+static void
+micro_i64div(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src)
+{
+   dst->i64[0] = src[0].i64[0] / src[1].i64[0];
+   dst->i64[1] = src[0].i64[1] / src[1].i64[1];
+   dst->i64[2] = src[0].i64[2] / src[1].i64[2];
+   dst->i64[3] = src[0].i64[3] / src[1].i64[3];
+}
+
+static void
+micro_u64mod(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src)
+{
+   dst->u64[0] = src[0].u64[0] % src[1].u64[0];
+   dst->u64[1] = src[0].u64[1] % src[1].u64[1];
+   dst->u64[2] = src[0].u64[2] % src[1].u64[2];
+   dst->u64[3] = src[0].u64[3] % src[1].u64[3];
+}
+
+static void
+micro_i64mod(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src)
+{
+   dst->i64[0] = src[0].i64[0] % src[1].i64[0];
+   dst->i64[1] = src[0].i64[1] % src[1].i64[1];
+   dst->i64[2] = src[0].i64[2] % src[1].i64[2];
+   dst->i64[3] = src[0].i64[3] % src[1].i64[3];
+}
+
+static void
+micro_u64shl(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src0,
+             union tgsi_exec_channel *src1)
+{
+   unsigned masked_count;
+   masked_count = src1->u[0] & 0x3f;
+   dst->u64[0] = src0->u64[0] << masked_count;
+   masked_count = src1->u[1] & 0x3f;
+   dst->u64[1] = src0->u64[1] << masked_count;
+   masked_count = src1->u[2] & 0x3f;
+   dst->u64[2] = src0->u64[2] << masked_count;
+   masked_count = src1->u[3] & 0x3f;
+   dst->u64[3] = src0->u64[3] << masked_count;
+}
+
+static void
+micro_i64shr(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src0,
+             union tgsi_exec_channel *src1)
+{
+   unsigned masked_count;
+   masked_count = src1->u[0] & 0x3f;
+   dst->i64[0] = src0->i64[0] >> masked_count;
+   masked_count = src1->u[1] & 0x3f;
+   dst->i64[1] = src0->i64[1] >> masked_count;
+   masked_count = src1->u[2] & 0x3f;
+   dst->i64[2] = src0->i64[2] >> masked_count;
+   masked_count = src1->u[3] & 0x3f;
+   dst->i64[3] = src0->i64[3] >> masked_count;
+}
+
+static void
+micro_u64shr(union tgsi_double_channel *dst,
+             const union tgsi_double_channel *src0,
+             union tgsi_exec_channel *src1)
+{
+   unsigned masked_count;
+   masked_count = src1->u[0] & 0x3f;
+   dst->u64[0] = src0->u64[0] >> masked_count;
+   masked_count = src1->u[1] & 0x3f;
+   dst->u64[1] = src0->u64[1] >> masked_count;
+   masked_count = src1->u[2] & 0x3f;
+   dst->u64[2] = src0->u64[2] >> masked_count;
+   masked_count = src1->u[3] & 0x3f;
+   dst->u64[3] = src0->u64[3] >> masked_count;
+}
+
 enum tgsi_exec_datatype {
    TGSI_EXEC_DATA_FLOAT,
    TGSI_EXEC_DATA_INT,
    TGSI_EXEC_DATA_UINT,
-   TGSI_EXEC_DATA_DOUBLE
+   TGSI_EXEC_DATA_DOUBLE,
+   TGSI_EXEC_DATA_INT64,
+   TGSI_EXEC_DATA_UINT64,
 };
 
 /*
@@ -3398,6 +3638,16 @@ exec_endswitch(struct tgsi_exec_machine *mach)
 typedef void (* micro_dop)(union tgsi_double_channel *dst,
                            const union tgsi_double_channel *src);
 
+typedef void (* micro_dop_sop)(union tgsi_double_channel *dst,
+                               const union tgsi_double_channel *src0,
+                               union tgsi_exec_channel *src1);
+
+typedef void (* micro_dop_s)(union tgsi_double_channel *dst,
+                             const union tgsi_exec_channel *src);
+
+typedef void (* micro_sop_d)(union tgsi_exec_channel *dst,
+                             const union tgsi_double_channel *src);
+
 static void
 fetch_double_channel(struct tgsi_exec_machine *mach,
                      union tgsi_double_channel *chan,
@@ -3550,131 +3800,6 @@ exec_double_trinary(struct tgsi_exec_machine *mach,
 }
 
 static void
-exec_f2d(struct tgsi_exec_machine *mach,
-         const struct tgsi_full_instruction *inst)
-{
-   union tgsi_exec_channel src;
-   union tgsi_double_channel dst;
-
-   if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
-      fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-      micro_f2d(&dst, &src);
-      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
-   }
-   if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
-      fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
-      micro_f2d(&dst, &src);
-      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
-   }
-}
-
-static void
-exec_d2f(struct tgsi_exec_machine *mach,
-         const struct tgsi_full_instruction *inst)
-{
-   union tgsi_double_channel src;
-   union tgsi_exec_channel dst;
-   int wm = inst->Dst[0].Register.WriteMask;
-   int i;
-   int bit;
-   for (i = 0; i < 2; i++) {
-      bit = ffs(wm);
-      if (bit) {
-         wm &= ~(1 << (bit - 1));
-         if (i == 0)
-            fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
-         else
-            fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
-         micro_d2f(&dst, &src);
-         store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_FLOAT);
-      }
-   }
-}
-
-static void
-exec_i2d(struct tgsi_exec_machine *mach,
-         const struct tgsi_full_instruction *inst)
-{
-   union tgsi_exec_channel src;
-   union tgsi_double_channel dst;
-
-   if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
-      fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
-      micro_i2d(&dst, &src);
-      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
-   }
-   if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
-      fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_INT);
-      micro_i2d(&dst, &src);
-      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
-   }
-}
-
-static void
-exec_d2i(struct tgsi_exec_machine *mach,
-         const struct tgsi_full_instruction *inst)
-{
-   union tgsi_double_channel src;
-   union tgsi_exec_channel dst;
-   int wm = inst->Dst[0].Register.WriteMask;
-   int i;
-   int bit;
-   for (i = 0; i < 2; i++) {
-      bit = ffs(wm);
-      if (bit) {
-         wm &= ~(1 << (bit - 1));
-         if (i == 0)
-            fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
-         else
-            fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
-         micro_d2i(&dst, &src);
-         store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_INT);
-      }
-   }
-}
-static void
-exec_u2d(struct tgsi_exec_machine *mach,
-         const struct tgsi_full_instruction *inst)
-{
-   union tgsi_exec_channel src;
-   union tgsi_double_channel dst;
-
-   if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
-      fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
-      micro_u2d(&dst, &src);
-      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
-   }
-   if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
-      fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_UINT);
-      micro_u2d(&dst, &src);
-      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
-   }
-}
-
-static void
-exec_d2u(struct tgsi_exec_machine *mach,
-         const struct tgsi_full_instruction *inst)
-{
-   union tgsi_double_channel src;
-   union tgsi_exec_channel dst;
-   int wm = inst->Dst[0].Register.WriteMask;
-   int i;
-   int bit;
-   for (i = 0; i < 2; i++) {
-      bit = ffs(wm);
-      if (bit) {
-         wm &= ~(1 << (bit - 1));
-         if (i == 0)
-            fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
-         else
-            fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
-         micro_d2u(&dst, &src);
-         store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_UINT);
-      }
-   }
-}
-
-static void
 exec_dldexp(struct tgsi_exec_machine *mach,
             const struct tgsi_full_instruction *inst)
 {
@@ -3721,6 +3846,32 @@ exec_dfracexp(struct tgsi_exec_machine *mach,
    }
 }
 
+static void
+exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach,
+            const struct tgsi_full_instruction *inst,
+            micro_dop_sop op)
+{
+   union tgsi_double_channel src0;
+   union tgsi_exec_channel src1;
+   union tgsi_double_channel dst;
+   int wmask;
+
+   wmask = inst->Dst[0].Register.WriteMask;
+   if (wmask & TGSI_WRITEMASK_XY) {
+      fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
+      fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
+      op(&dst, &src0, &src1);
+      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
+   }
+
+   if (wmask & TGSI_WRITEMASK_ZW) {
+      fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
+      fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
+      op(&dst, &src0, &src1);
+      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
+   }
+}
+
 static int
 get_image_coord_dim(unsigned tgsi_tex)
 {
@@ -4304,6 +4455,152 @@ exec_resq(struct tgsi_exec_machine *mach,
 }
 
 static void
+micro_f2u64(union tgsi_double_channel *dst,
+            const union tgsi_exec_channel *src)
+{
+   dst->u64[0] = (uint64_t)src->f[0];
+   dst->u64[1] = (uint64_t)src->f[1];
+   dst->u64[2] = (uint64_t)src->f[2];
+   dst->u64[3] = (uint64_t)src->f[3];
+}
+
+static void
+micro_f2i64(union tgsi_double_channel *dst,
+            const union tgsi_exec_channel *src)
+{
+   dst->i64[0] = (int64_t)src->f[0];
+   dst->i64[1] = (int64_t)src->f[1];
+   dst->i64[2] = (int64_t)src->f[2];
+   dst->i64[3] = (int64_t)src->f[3];
+}
+
+static void
+micro_i2u64(union tgsi_double_channel *dst,
+            const union tgsi_exec_channel *src)
+{
+   dst->u64[0] = (uint64_t)src->i[0];
+   dst->u64[1] = (uint64_t)src->i[1];
+   dst->u64[2] = (uint64_t)src->i[2];
+   dst->u64[3] = (uint64_t)src->i[3];
+}
+
+static void
+micro_i2i64(union tgsi_double_channel *dst,
+            const union tgsi_exec_channel *src)
+{
+   dst->i64[0] = (int64_t)src->i[0];
+   dst->i64[1] = (int64_t)src->i[1];
+   dst->i64[2] = (int64_t)src->i[2];
+   dst->i64[3] = (int64_t)src->i[3];
+}
+
+static void
+micro_d2u64(union tgsi_double_channel *dst,
+           const union tgsi_double_channel *src)
+{
+   dst->u64[0] = (uint64_t)src->d[0];
+   dst->u64[1] = (uint64_t)src->d[1];
+   dst->u64[2] = (uint64_t)src->d[2];
+   dst->u64[3] = (uint64_t)src->d[3];
+}
+
+static void
+micro_d2i64(union tgsi_double_channel *dst,
+           const union tgsi_double_channel *src)
+{
+   dst->i64[0] = (int64_t)src->d[0];
+   dst->i64[1] = (int64_t)src->d[1];
+   dst->i64[2] = (int64_t)src->d[2];
+   dst->i64[3] = (int64_t)src->d[3];
+}
+
+static void
+micro_u642d(union tgsi_double_channel *dst,
+           const union tgsi_double_channel *src)
+{
+   dst->d[0] = (double)src->u64[0];
+   dst->d[1] = (double)src->u64[1];
+   dst->d[2] = (double)src->u64[2];
+   dst->d[3] = (double)src->u64[3];
+}
+
+static void
+micro_i642d(union tgsi_double_channel *dst,
+           const union tgsi_double_channel *src)
+{
+   dst->d[0] = (double)src->i64[0];
+   dst->d[1] = (double)src->i64[1];
+   dst->d[2] = (double)src->i64[2];
+   dst->d[3] = (double)src->i64[3];
+}
+
+static void
+micro_u642f(union tgsi_exec_channel *dst,
+            const union tgsi_double_channel *src)
+{
+   dst->f[0] = (float)src->u64[0];
+   dst->f[1] = (float)src->u64[1];
+   dst->f[2] = (float)src->u64[2];
+   dst->f[3] = (float)src->u64[3];
+}
+
+static void
+micro_i642f(union tgsi_exec_channel *dst,
+            const union tgsi_double_channel *src)
+{
+   dst->f[0] = (float)src->i64[0];
+   dst->f[1] = (float)src->i64[1];
+   dst->f[2] = (float)src->i64[2];
+   dst->f[3] = (float)src->i64[3];
+}
+
+static void
+exec_t_2_64(struct tgsi_exec_machine *mach,
+          const struct tgsi_full_instruction *inst,
+          micro_dop_s op,
+          enum tgsi_exec_datatype src_datatype)
+{
+   union tgsi_exec_channel src;
+   union tgsi_double_channel dst;
+
+   if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
+      fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
+      op(&dst, &src);
+      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
+   }
+   if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
+      fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype);
+      op(&dst, &src);
+      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
+   }
+}
+
+static void
+exec_64_2_t(struct tgsi_exec_machine *mach,
+            const struct tgsi_full_instruction *inst,
+            micro_sop_d op,
+            enum tgsi_exec_datatype dst_datatype)
+{
+   union tgsi_double_channel src;
+   union tgsi_exec_channel dst;
+   int wm = inst->Dst[0].Register.WriteMask;
+   int i;
+   int bit;
+   for (i = 0; i < 2; i++) {
+      bit = ffs(wm);
+      if (bit) {
+         wm &= ~(1 << (bit - 1));
+         if (i == 0)
+            fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
+         else
+            fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
+         op(&dst, &src);
+         store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, dst_datatype);
+      }
+   }
+}
+
+static void
 micro_i2f(union tgsi_exec_channel *dst,
           const union tgsi_exec_channel *src)
 {
@@ -5683,11 +5980,11 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_F2D:
-      exec_f2d(mach, inst);
+      exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT);
       break;
 
    case TGSI_OPCODE_D2F:
-      exec_d2f(mach, inst);
+      exec_64_2_t(mach, inst, micro_d2f, TGSI_EXEC_DATA_FLOAT);
       break;
 
    case TGSI_OPCODE_DABS:
@@ -5759,19 +6056,19 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_I2D:
-      exec_i2d(mach, inst);
+      exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_INT);
       break;
 
    case TGSI_OPCODE_D2I:
-      exec_d2i(mach, inst);
+      exec_64_2_t(mach, inst, micro_d2i, TGSI_EXEC_DATA_INT);
       break;
 
    case TGSI_OPCODE_U2D:
-      exec_u2d(mach, inst);
+      exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_UINT);
       break;
 
    case TGSI_OPCODE_D2U:
-      exec_d2u(mach, inst);
+      exec_64_2_t(mach, inst, micro_d2u, TGSI_EXEC_DATA_INT);
       break;
 
    case TGSI_OPCODE_LOAD:
@@ -5802,6 +6099,118 @@ exec_instruction(
    case TGSI_OPCODE_MEMBAR:
       return TRUE;
       break;
+
+   case TGSI_OPCODE_I64ABS:
+      exec_double_unary(mach, inst, micro_i64abs);
+      break;
+
+   case TGSI_OPCODE_I64SSG:
+      exec_double_unary(mach, inst, micro_i64sgn);
+      break;
+
+   case TGSI_OPCODE_I64NEG:
+      exec_double_unary(mach, inst, micro_i64neg);
+      break;
+
+   case TGSI_OPCODE_U64SEQ:
+      exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT);
+      break;
+
+   case TGSI_OPCODE_U64SNE:
+      exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT);
+      break;
+
+   case TGSI_OPCODE_I64SLT:
+      exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT);
+      break;
+   case TGSI_OPCODE_U64SLT:
+      exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT);
+      break;
+
+   case TGSI_OPCODE_I64SGE:
+      exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT);
+      break;
+   case TGSI_OPCODE_U64SGE:
+      exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT);
+      break;
+
+   case TGSI_OPCODE_I64MIN:
+      exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64);
+      break;
+   case TGSI_OPCODE_U64MIN:
+      exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64);
+      break;
+   case TGSI_OPCODE_I64MAX:
+      exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64);
+      break;
+   case TGSI_OPCODE_U64MAX:
+      exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64);
+      break;
+   case TGSI_OPCODE_U64ADD:
+      exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64);
+      break;
+   case TGSI_OPCODE_U64MUL:
+      exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64);
+      break;
+   case TGSI_OPCODE_U64SHL:
+      exec_arg0_64_arg1_32(mach, inst, micro_u64shl);
+      break;
+   case TGSI_OPCODE_I64SHR:
+      exec_arg0_64_arg1_32(mach, inst, micro_i64shr);
+      break;
+   case TGSI_OPCODE_U64SHR:
+      exec_arg0_64_arg1_32(mach, inst, micro_u64shr);
+      break;
+   case TGSI_OPCODE_U64DIV:
+      exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64);
+      break;
+   case TGSI_OPCODE_I64DIV:
+      exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64);
+      break;
+   case TGSI_OPCODE_U64MOD:
+      exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64);
+      break;
+   case TGSI_OPCODE_I64MOD:
+      exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64);
+      break;
+
+   case TGSI_OPCODE_F2U64:
+      exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT);
+      break;
+
+   case TGSI_OPCODE_F2I64:
+      exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT);
+      break;
+
+   case TGSI_OPCODE_I2U64:
+      exec_t_2_64(mach, inst, micro_i2u64, TGSI_EXEC_DATA_INT);
+      break;
+   case TGSI_OPCODE_I2I64:
+      exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT);
+      break;
+
+   case TGSI_OPCODE_D2U64:
+      exec_double_unary(mach, inst, micro_d2u64);
+      break;
+
+   case TGSI_OPCODE_D2I64:
+      exec_double_unary(mach, inst, micro_d2i64);
+      break;
+
+   case TGSI_OPCODE_U642F:
+      exec_64_2_t(mach, inst, micro_u642f, TGSI_EXEC_DATA_FLOAT);
+      break;
+   case TGSI_OPCODE_I642F:
+      exec_64_2_t(mach, inst, micro_i642f, TGSI_EXEC_DATA_FLOAT);
+      break;
+
+   case TGSI_OPCODE_U642D:
+      exec_double_unary(mach, inst, micro_u642d);
+      break;
+   case TGSI_OPCODE_I642D:
+      exec_double_unary(mach, inst, micro_i642d);
+      break;
+
    default:
       assert( 0 );
    }
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index a4b2299..a8e9af6 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -280,6 +280,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
       return 0;
    case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
       return 4;
+   case PIPE_CAP_INT64:
+      return 1;
    }
    /* should only get here on unhandled cases */
    debug_printf("Unexpected PIPE_CAP %d query\n", param);
-- 
2.5.5



More information about the mesa-dev mailing list