[Mesa-dev] [PATCH 23/23] gallivm/llvmpipe: add support for ARB_gpu_shader_int64.

Thu Jun 9 00:48:24 UTC 2016

From: Dave Airlie <airlied at redhat.com>

This enables 64-bit integer support in gallivm and
llvmpipe.

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/gallium/auxiliary/gallivm/lp_bld_limits.h      |   1 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.c        |   8 +-
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.h        |   4 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 352 +++++++++++++++++++++
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c    |  68 +++-
 src/gallium/auxiliary/tgsi/tgsi_info.h             |   9 +
 6 files changed, 425 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
index 32addec..017c86f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -134,6 +134,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
       return 1;
    case PIPE_SHADER_CAP_DOUBLES:
+   case PIPE_SHADER_CAP_INT64:
       return 1;
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
index 614c655..ceefe73 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -204,12 +204,12 @@ static int get_src_chan_idx(unsigned opcode,
    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(opcode);
    enum tgsi_opcode_type stype = tgsi_opcode_infer_src_type(opcode);
 
-   if (dtype != TGSI_TYPE_DOUBLE && stype != TGSI_TYPE_DOUBLE)
+   if (!tgsi_type_is64bit(dtype) && !tgsi_type_is64bit(stype))
       return dst_chan_index;
-   if (dtype == TGSI_TYPE_DOUBLE) {
+   if (tgsi_type_is64bit(dtype)) {
       if (dst_chan_index == 1 || dst_chan_index == 3)
          return -1;
-      if (stype == TGSI_TYPE_DOUBLE)
+      if (tgsi_type_is64bit(stype))
          return dst_chan_index;
       if (dst_chan_index == 0)
          return 0;
@@ -364,6 +364,8 @@ lp_build_emit_fetch(
          break;
       case TGSI_TYPE_UNSIGNED:
       case TGSI_TYPE_SIGNED:
+      case TGSI_TYPE_UNSIGNED64:
+      case TGSI_TYPE_SIGNED64:
       case TGSI_TYPE_VOID:
       default:
          /* abs modifier is only legal on floating point types */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index b9094dc..50866ad 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -337,6 +337,10 @@ struct lp_build_tgsi_context
    struct lp_build_context int_bld;
 
    struct lp_build_context dbl_bld;
+
+   struct lp_build_context uint64_bld;
+   struct lp_build_context int64_bld;
+
    /** This array stores functions that are used to transform TGSI opcodes to
      * LLVM instructions.
      */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index 43af6b4..c0e7338 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -2127,6 +2127,331 @@ dsqrt_emit_cpu(
                                                       emit_data->args[0]);
 }
 
+static void
+i64abs_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->int64_bld,
+                                                       emit_data->args[0]);
+}
+
+static void
+i64ssg_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->int64_bld,
+                                                       emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_INEG (CPU Only) */
+static void
+i64neg_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->int64_bld,
+                                                     bld_base->int64_bld.zero,
+                                                     emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_DSET Helper (CPU Only) */
+static void
+u64set_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data,
+   unsigned pipe_func)
+{
+   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+   LLVMValueRef cond = lp_build_cmp(&bld_base->uint64_bld, pipe_func,
+                                    emit_data->args[0], emit_data->args[1]);
+   /* arguments were 64 bit but store as 32 bit */
+   cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
+   emit_data->output[emit_data->chan] = cond;
+}
+
+static void
+u64seq_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
+}
+
+static void
+u64sne_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
+}
+
+static void
+u64slt_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
+}
+
+static void
+u64sge_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
+}
+
+static void
+i64set_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data,
+   unsigned pipe_func)
+{
+   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+   LLVMValueRef cond = lp_build_cmp(&bld_base->int64_bld, pipe_func,
+                                    emit_data->args[0], emit_data->args[1]);
+   /* arguments were 64 bit but store as 32 bit */
+   cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
+   emit_data->output[emit_data->chan] = cond;
+}
+
+static void
+i64slt_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   i64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
+}
+
+static void
+i64sge_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   i64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
+}
+
+static void
+u64max_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_max(&bld_base->uint64_bld,
+                                   emit_data->args[0], emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_UMIN (CPU Only) */
+static void
+u64min_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_min(&bld_base->uint64_bld,
+                                   emit_data->args[0], emit_data->args[1]);
+}
+
+static void
+i64max_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_max(&bld_base->int64_bld,
+                                   emit_data->args[0], emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_UMIN (CPU Only) */
+static void
+i64min_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_min(&bld_base->int64_bld,
+                                   emit_data->args[0], emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_U64ADD (CPU Only) */
+static void
+u64add_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_add(&bld_base->uint64_bld,
+                                   emit_data->args[0], emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_U64MUL */
+static void
+u64mul_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_mul(&bld_base->uint64_bld,
+                                   emit_data->args[0], emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_U64SHL (CPU Only) */
+static void
+u64shl_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_context *uint_bld = &bld_base->uint64_bld;
+   LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
+                                          uint_bld->type.width - 1);
+   LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
+   emit_data->output[emit_data->chan] = lp_build_shl(uint_bld, emit_data->args[0],
+                                                     masked_count);
+}
+
+/* TGSI_OPCODE_I64SHR (CPU Only) */
+static void
+i64shr_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_context *int_bld = &bld_base->int64_bld;
+   LLVMValueRef mask = lp_build_const_vec(int_bld->gallivm, int_bld->type,
+                                          int_bld->type.width - 1);
+   LLVMValueRef masked_count = lp_build_and(int_bld, emit_data->args[1], mask);
+   emit_data->output[emit_data->chan] = lp_build_shr(int_bld, emit_data->args[0],
+                                                     masked_count);
+}
+
+/* TGSI_OPCODE_U64SHR (CPU Only) */
+static void
+u64shr_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_context *uint_bld = &bld_base->uint64_bld;
+   LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
+                                          uint_bld->type.width - 1);
+   LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
+   emit_data->output[emit_data->chan] = lp_build_shr(uint_bld, emit_data->args[0],
+                                                     masked_count);
+}
+
+/* TGSI_OPCODE_UDIV (CPU Only) */
+static void
+u64div_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+   LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint64_bld,
+                                        PIPE_FUNC_EQUAL, emit_data->args[1],
+                                        bld_base->uint64_bld.zero);
+   /* We want to make sure that we never divide/mod by zero to not
+    * generate sigfpe. We don't want to crash just because the
+    * shader is doing something weird. */
+   LLVMValueRef divisor = LLVMBuildOr(builder,
+                                      div_mask,
+                                      emit_data->args[1], "");
+   LLVMValueRef result = lp_build_div(&bld_base->uint64_bld,
+                                      emit_data->args[0], divisor);
+   /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10 */
+   emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
+                                                    div_mask,
+                                                    result, "");
+}
+
+/* TGSI_OPCODE_MOD (CPU Only) */
+static void
+u64mod_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+   LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint64_bld,
+                                        PIPE_FUNC_EQUAL, emit_data->args[1],
+                                        bld_base->uint64_bld.zero);
+   /* We want to make sure that we never divide/mod by zero to not
+    * generate sigfpe. We don't want to crash just because the
+    * shader is doing something weird. */
+   LLVMValueRef divisor = LLVMBuildOr(builder,
+                                      div_mask,
+                                      emit_data->args[1], "");
+   LLVMValueRef result = lp_build_mod(&bld_base->uint64_bld,
+                                      emit_data->args[0], divisor);
+   /* umod by zero doesn't have a guaranteed return value chose -1 for now. */
+   emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
+                                                    div_mask,
+                                                    result, "");
+}
+
+static void
+i64div_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+   LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint64_bld,
+                                        PIPE_FUNC_EQUAL, emit_data->args[1],
+                                        bld_base->uint64_bld.zero);
+   /* We want to make sure that we never divide/mod by zero to not
+    * generate sigfpe. We don't want to crash just because the
+    * shader is doing something weird. */
+   LLVMValueRef divisor = LLVMBuildOr(builder,
+                                      div_mask,
+                                      emit_data->args[1], "");
+   LLVMValueRef result = lp_build_div(&bld_base->int64_bld,
+                                      emit_data->args[0], divisor);
+   /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10 */
+   emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
+                                                    div_mask,
+                                                    result, "");
+}
+
+/* TGSI_OPCODE_MOD (CPU Only) */
+static void
+i64mod_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+   LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint64_bld,
+                                        PIPE_FUNC_EQUAL, emit_data->args[1],
+                                        bld_base->uint64_bld.zero);
+   /* We want to make sure that we never divide/mod by zero to not
+    * generate sigfpe. We don't want to crash just because the
+    * shader is doing something weird. */
+   LLVMValueRef divisor = LLVMBuildOr(builder,
+                                      div_mask,
+                                      emit_data->args[1], "");
+   LLVMValueRef result = lp_build_mod(&bld_base->int64_bld,
+                                      emit_data->args[0], divisor);
+   /* umod by zero doesn't have a guaranteed return value chose -1 for now. */
+   emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
+                                                    div_mask,
+                                                    result, "");
+}
+
 void
 lp_set_default_actions_cpu(
    struct lp_build_tgsi_context * bld_base)
@@ -2209,4 +2534,31 @@ lp_set_default_actions_cpu(
    bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = drecip_sqrt_emit_cpu;
    bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = dsqrt_emit_cpu;
 
+   bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = i64abs_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = i64ssg_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = i64neg_emit_cpu;
+
+   bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = u64seq_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = u64sne_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = u64slt_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = u64sge_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = i64slt_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = i64sge_emit_cpu;
+
+   bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = u64min_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = u64max_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = i64min_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = i64max_emit_cpu;
+
+   bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = u64add_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_U64MUL].emit = u64mul_emit_cpu;
+
+   bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = u64shl_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = i64shr_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = u64shr_emit_cpu;
+
+   bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = u64div_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = i64div_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = u64mod_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = i64mod_emit_cpu;
 }
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 31157a8..ecaab59 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -1168,6 +1168,12 @@ stype_to_fetch(struct lp_build_tgsi_context * bld_base,
    case TGSI_TYPE_DOUBLE:
       bld_fetch = &bld_base->dbl_bld;
       break;
+   case TGSI_TYPE_UNSIGNED64:
+      bld_fetch = &bld_base->uint64_bld;
+      break;
+   case TGSI_TYPE_SIGNED64:
+      bld_fetch = &bld_base->int64_bld;
+      break;
    case TGSI_TYPE_VOID:
    default:
       assert(0);
@@ -1264,7 +1270,7 @@ emit_fetch_constant(
       index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
       index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
 
-      if (stype == TGSI_TYPE_DOUBLE) {
+      if (tgsi_type_is64bit(stype)) {
          LLVMValueRef swizzle_vec2;
          swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle + 1);
          index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
@@ -1285,12 +1291,20 @@ emit_fetch_constant(
          LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
          scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
          bld_broad = &bld_base->dbl_bld;
+      } else if (stype == TGSI_TYPE_UNSIGNED64) {
+         LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
+         scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
+         bld_broad = &bld_base->uint64_bld;
+      } else if (stype == TGSI_TYPE_SIGNED64) {
+         LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
+         scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
+         bld_broad = &bld_base->int64_bld;
       }
       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
       res = lp_build_broadcast_scalar(bld_broad, scalar);
    }
 
-   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE) {
+   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
    }
@@ -1399,7 +1413,7 @@ emit_fetch_immediate(
    }
    else {
       res = bld->immediates[reg->Register.Index][swizzle];
-      if (stype == TGSI_TYPE_DOUBLE)
+      if (tgsi_type_is64bit(stype))
          res = emit_fetch_double(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]);
    }
 
@@ -1409,6 +1423,10 @@ emit_fetch_immediate(
       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
    } else if (stype == TGSI_TYPE_DOUBLE) {
       res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
+   } else if (stype == TGSI_TYPE_UNSIGNED64) {
+      res = LLVMBuildBitCast(builder, res, bld_base->uint64_bld.vec_type, "");
+   } else if (stype == TGSI_TYPE_SIGNED64) {
+      res = LLVMBuildBitCast(builder, res, bld_base->int64_bld.vec_type, "");
    }
    return res;
 }
@@ -1441,7 +1459,7 @@ emit_fetch_input(
                                         indirect_index,
                                         swizzle,
                                         TRUE);
-      if (stype == TGSI_TYPE_DOUBLE) {
+      if (tgsi_type_is64bit(stype)) {
          index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
                                            indirect_index,
                                            swizzle + 1,
@@ -1461,7 +1479,7 @@ emit_fetch_input(
                                                bld->inputs_array, &lindex, 1, "");
 
          res = LLVMBuildLoad(builder, input_ptr, "");
-         if (stype == TGSI_TYPE_DOUBLE) {
+         if (tgsi_type_is64bit(stype)) {
             LLVMValueRef lindex1;
             LLVMValueRef input_ptr2;
             LLVMValueRef res2;
@@ -1476,7 +1494,7 @@ emit_fetch_input(
       }
       else {
          res = bld->inputs[reg->Register.Index][swizzle];
-         if (stype == TGSI_TYPE_DOUBLE)
+         if (tgsi_type_is64bit(stype))
             res = emit_fetch_double(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]);
       }
    }
@@ -1489,6 +1507,10 @@ emit_fetch_input(
       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
    } else if (stype == TGSI_TYPE_DOUBLE) {
       res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
+   } else if (stype == TGSI_TYPE_UNSIGNED64) {
+      res = LLVMBuildBitCast(builder, res, bld_base->uint64_bld.vec_type, "");
+   } else if (stype == TGSI_TYPE_SIGNED64) {
+      res = LLVMBuildBitCast(builder, res, bld_base->int64_bld.vec_type, "");
    }
 
    return res;
@@ -1548,7 +1570,7 @@ emit_fetch_gs_input(
                                     swizzle_index);
 
    assert(res);
-   if (stype == TGSI_TYPE_DOUBLE) {
+   if (tgsi_type_is64bit(stype)) {
       LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle + 1);
       LLVMValueRef res2;
       res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
@@ -1595,7 +1617,7 @@ emit_fetch_temporary(
                                         indirect_index,
                                         swizzle,
                                         TRUE);
-      if (stype == TGSI_TYPE_DOUBLE) {
+      if (tgsi_type_is64bit(stype)) {
                index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
                                                   indirect_index,
                                                   swizzle + 1,
@@ -1614,7 +1636,7 @@ emit_fetch_temporary(
       temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
       res = LLVMBuildLoad(builder, temp_ptr, "");
 
-      if (stype == TGSI_TYPE_DOUBLE) {
+      if (tgsi_type_is64bit(stype)) {
          LLVMValueRef temp_ptr2, res2;
 
          temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1);
@@ -1623,7 +1645,11 @@ emit_fetch_temporary(
       }
    }
 
-   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE) {
+   if (stype == TGSI_TYPE_SIGNED ||
+       stype == TGSI_TYPE_UNSIGNED ||
+       stype == TGSI_TYPE_DOUBLE ||
+       stype == TGSI_TYPE_SIGNED64 ||
+       stype == TGSI_TYPE_UNSIGNED64) {
       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
    }
@@ -1912,7 +1938,7 @@ emit_store_chan(
          LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
                                                   chan_index);
 
-         if (dtype == TGSI_TYPE_DOUBLE) {
+         if (tgsi_type_is64bit(dtype)) {
             LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
                                                       chan_index + 1);
             emit_store_double_chan(bld_base, dtype, out_ptr, out_ptr2,
@@ -1924,7 +1950,7 @@ emit_store_chan(
 
    case TGSI_FILE_TEMPORARY:
       /* Temporaries are always stored as floats */
-      if (dtype != TGSI_TYPE_DOUBLE)
+      if (!tgsi_type_is64bit(dtype))
          value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
       else
          value = LLVMBuildBitCast(builder, value,  LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
@@ -1950,7 +1976,7 @@ emit_store_chan(
          LLVMValueRef temp_ptr;
          temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
 
-         if (dtype == TGSI_TYPE_DOUBLE) {
+         if (tgsi_type_is64bit(dtype)) {
             LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
                                                          reg->Register.Index,
                                                          chan_index + 1);
@@ -2035,7 +2061,7 @@ emit_store(
 
       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
 
-         if (dtype == TGSI_TYPE_DOUBLE && (chan_index == 1 || chan_index == 3))
+         if (tgsi_type_is64bit(dtype) && (chan_index == 1 || chan_index == 3))
              continue;
          emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
       }
@@ -3052,6 +3078,8 @@ void lp_emit_immediate_soa(
 
       break;
    case TGSI_IMM_FLOAT64:
+   case TGSI_IMM_UINT64:
+   case TGSI_IMM_INT64:
    case TGSI_IMM_UINT32:
       for( i = 0; i < size; ++i ) {
          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
@@ -3909,6 +3937,18 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
       dbl_type.width *= 2;
       lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
    }
+   {
+      struct lp_type uint64_type;
+      uint64_type = lp_uint_type(type);
+      uint64_type.width *= 2;
+      lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
+   }
+   {
+      struct lp_type int64_type;
+      int64_type = lp_int_type(type);
+      int64_type.width *= 2;
+      lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
+   }
    bld.mask = mask;
    bld.inputs = inputs;
    bld.outputs = outputs;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h
index 2eaa09a..e88a884 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.h
@@ -103,6 +103,15 @@ enum tgsi_opcode_type {
    TGSI_TYPE_SIGNED64,
 };
 
+
+static inline bool tgsi_type_is64bit(enum tgsi_opcode_type type)
+{
+   if (type == TGSI_TYPE_DOUBLE || type == TGSI_TYPE_UNSIGNED64 ||
+       type == TGSI_TYPE_SIGNED64)
+      return true;
+   return false;
+}
+
 enum tgsi_opcode_type
 tgsi_opcode_infer_src_type( uint opcode );
 
-- 
2.5.5