[Mesa-dev] [PATCH v2 4/6] gallivm/llvmpipe: prepare support for ARB_gpu_shader_int64.

Mon Sep 19 16:25:31 UTC 2016

Am 19.09.2016 um 15:08 schrieb Nicolai Hähnle:
> From: Dave Airlie <airlied at redhat.com>
> 
> This enables 64-bit integer support in gallivm and
> llvmpipe.
> 
> v2: add conversion opcodes.
> v3:
> - PIPE_CAP_INT64 is not there yet
> - restrict DIV/MOD defaults to the CPU, as for 32 bits
> - TGSI_OPCODE_I2U64 becomes TGSI_OPCODE_U2I64
> 
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi.c        |   2 +
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi.h        |   4 +
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 471 +++++++++++++++++++++
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c    |  40 +-
>  src/gallium/auxiliary/tgsi/tgsi_info.h             |   3 +-
>  5 files changed, 515 insertions(+), 5 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> index 1ef6ae4..b397261 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> @@ -357,20 +357,22 @@ lp_build_emit_fetch(
>     if (reg->Register.Absolute) {
>        switch (stype) {
>        case TGSI_TYPE_FLOAT:
>        case TGSI_TYPE_DOUBLE:
>        case TGSI_TYPE_UNTYPED:
>            /* modifiers on movs assume data is float */
>           res = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS, res);
>           break;
>        case TGSI_TYPE_UNSIGNED:
>        case TGSI_TYPE_SIGNED:
> +      case TGSI_TYPE_UNSIGNED64:
> +      case TGSI_TYPE_SIGNED64:
>        case TGSI_TYPE_VOID:
>        default:
>           /* abs modifier is only legal on floating point types */
>           assert(0);
>           break;
>        }
>     }
>  
>     if (reg->Register.Negate) {
>        switch (stype) {
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> index de1150c..b6b3fe3 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> @@ -330,20 +330,24 @@ typedef LLVMValueRef (*lp_build_emit_fetch_fn)(struct lp_build_tgsi_context *,
>                                          unsigned);
>  
>  struct lp_build_tgsi_context
>  {
>     struct lp_build_context base;
>  
>     struct lp_build_context uint_bld;
>     struct lp_build_context int_bld;
>  
>     struct lp_build_context dbl_bld;
> +
> +   struct lp_build_context uint64_bld;
> +   struct lp_build_context int64_bld;
> +
>     /** This array stores functions that are used to transform TGSI opcodes to
>       * LLVM instructions.
>       */
>     struct lp_build_tgsi_action op_actions[TGSI_OPCODE_LAST];
>  
>     /* TGSI_OPCODE_RSQ is defined as 1 / sqrt( abs(src0.x) ), rsq_action
>      * should compute 1 / sqrt (src0.x) */
>     struct lp_build_tgsi_action rsq_action;
>  
>     struct lp_build_tgsi_action sqrt_action;
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> index 1ee9704..d924770 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> @@ -1086,20 +1086,230 @@ static void dfrac_emit(
>     struct lp_build_tgsi_context * bld_base,
>     struct lp_build_emit_data * emit_data)
>  {
>     LLVMValueRef tmp;
>     tmp = lp_build_floor(&bld_base->dbl_bld,
>  			emit_data->args[0]);
>     emit_data->output[emit_data->chan] =  LLVMBuildFSub(bld_base->base.gallivm->builder,
>                                                         emit_data->args[0], tmp, "");
>  }
>  
> +/* TGSI_OPCODE_U64MUL */
> +static void
> +u64mul_emit(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] = lp_build_mul(&bld_base->uint64_bld,
> +                                   emit_data->args[0], emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_U64MOD  */
> +static void
> +u64mod_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> +   LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint64_bld,
> +                                        PIPE_FUNC_EQUAL, emit_data->args[1],
> +                                        bld_base->uint64_bld.zero);
> +   /* We want to make sure that we never divide/mod by zero to not
> +    * generate sigfpe. We don't want to crash just because the
> +    * shader is doing something weird. */
> +   LLVMValueRef divisor = LLVMBuildOr(builder,
> +                                      div_mask,
> +                                      emit_data->args[1], "");
> +   LLVMValueRef result = lp_build_mod(&bld_base->uint64_bld,
> +                                      emit_data->args[0], divisor);
> +   /* umod by zero doesn't have a guaranteed return value chose -1 for now. */
> +   emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
> +                                                    div_mask,
> +                                                    result, "");
> +}
> +
> +/* TGSI_OPCODE_MOD (CPU Only) */
I64MOD

> +static void
> +i64mod_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> +   LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint64_bld,
> +                                        PIPE_FUNC_EQUAL, emit_data->args[1],
> +                                        bld_base->uint64_bld.zero);
> +   /* We want to make sure that we never divide/mod by zero to not
> +    * generate sigfpe. We don't want to crash just because the
> +    * shader is doing something weird. */
> +   LLVMValueRef divisor = LLVMBuildOr(builder,
> +                                      div_mask,
> +                                      emit_data->args[1], "");
> +   LLVMValueRef result = lp_build_mod(&bld_base->int64_bld,
> +                                      emit_data->args[0], divisor);
> +   /* umod by zero doesn't have a guaranteed return value chose -1 for now. */
> +   emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
> +                                                    div_mask,
> +                                                    result, "");
> +}
> +
> +/* TGSI_OPCODE_U64DIV */
> +static void
> +u64div_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +
> +   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> +   LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint64_bld,
> +                                        PIPE_FUNC_EQUAL, emit_data->args[1],
> +                                        bld_base->uint64_bld.zero);
> +   /* We want to make sure that we never divide/mod by zero to not
> +    * generate sigfpe. We don't want to crash just because the
> +    * shader is doing something weird. */
> +   LLVMValueRef divisor = LLVMBuildOr(builder,
> +                                      div_mask,
> +                                      emit_data->args[1], "");
> +   LLVMValueRef result = LLVMBuildUDiv(builder,
> +				       emit_data->args[0], divisor, "");
> +   /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10 */
> +   emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
> +                                                    div_mask,
> +                                                    result, "");
> +}
> +
> +/* TGSI_OPCODE_I64DIV */
> +static void
> +i64div_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +
> +   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> +   LLVMValueRef div_mask = lp_build_cmp(&bld_base->int64_bld,
> +                                        PIPE_FUNC_EQUAL, emit_data->args[1],
> +                                        bld_base->int64_bld.zero);
> +   /* We want to make sure that we never divide/mod by zero to not
> +    * generate sigfpe. We don't want to crash just because the
> +    * shader is doing something weird. */
> +   LLVMValueRef divisor = LLVMBuildOr(builder,
> +                                      div_mask,
> +                                      emit_data->args[1], "");
> +   LLVMValueRef result = LLVMBuildSDiv(builder,
> +				       emit_data->args[0], divisor, "");
> +   /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10 */
> +   emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
> +                                                    div_mask,
> +                                                    result, "");
> +}
> +
> +/* TGSI_OPCODE_F2U */
F2U64

> +static void
> +f2u64_emit(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> +      LLVMBuildFPToUI(bld_base->base.gallivm->builder,
> +                      emit_data->args[0],
> +                      bld_base->uint64_bld.vec_type, "");
> +}
> +
> +static void
> +f2i64_emit(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> +      LLVMBuildFPToSI(bld_base->base.gallivm->builder,
> +                      emit_data->args[0],
> +                      bld_base->int64_bld.vec_type, "");
> +}
> +
> +static void
> +u2i64_emit(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> +      LLVMBuildZExt(bld_base->base.gallivm->builder,
> +                      emit_data->args[0],
> +                      bld_base->uint64_bld.vec_type, "");
> +}
> +
> +static void
> +i2i64_emit(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> +      LLVMBuildSExt(bld_base->base.gallivm->builder,
> +                      emit_data->args[0],
> +                      bld_base->int64_bld.vec_type, "");
> +}
> +
> +static void
> +i642f_emit(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> +      LLVMBuildSIToFP(bld_base->base.gallivm->builder,
> +                      emit_data->args[0],
> +                      bld_base->base.vec_type, "");
> +}
> +
> +static void
> +u642f_emit(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> +      LLVMBuildUIToFP(bld_base->base.gallivm->builder,
> +                      emit_data->args[0],
> +                      bld_base->base.vec_type, "");
> +}
> +
> +static void
> +i642d_emit(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> +      LLVMBuildSIToFP(bld_base->base.gallivm->builder,
> +                      emit_data->args[0],
> +                      bld_base->dbl_bld.vec_type, "");
> +}
> +
> +static void
> +u642d_emit(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> +      LLVMBuildUIToFP(bld_base->base.gallivm->builder,
> +                      emit_data->args[0],
> +                      bld_base->dbl_bld.vec_type, "");
> +}
> +
>  void
>  lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
>  {
>     bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action;
>     bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action;
>     bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action;
>     bld_base->op_actions[TGSI_OPCODE_DP2A] = dp2a_action;
>     bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action;
>     bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
>     bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
> @@ -1161,20 +1371,40 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
>  
>     bld_base->op_actions[TGSI_OPCODE_F2D].emit = f2d_emit;
>     bld_base->op_actions[TGSI_OPCODE_I2D].emit = i2d_emit;
>     bld_base->op_actions[TGSI_OPCODE_U2D].emit = u2d_emit;
>  
>     bld_base->op_actions[TGSI_OPCODE_DMAD].emit = dmad_emit;
>  
>     bld_base->op_actions[TGSI_OPCODE_DRCP].emit = drcp_emit;
>     bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = dfrac_emit;
>  
> +   bld_base->op_actions[TGSI_OPCODE_U64MUL].emit = u64mul_emit;
> +
> +   bld_base->op_actions[TGSI_OPCODE_F2I64].emit = f2i64_emit;
> +   bld_base->op_actions[TGSI_OPCODE_F2U64].emit = f2u64_emit;
> +
> +   bld_base->op_actions[TGSI_OPCODE_D2I64].emit = f2i64_emit;
> +   bld_base->op_actions[TGSI_OPCODE_D2U64].emit = f2u64_emit;
> +
> +   bld_base->op_actions[TGSI_OPCODE_I2I64].emit = i2i64_emit;
> +   bld_base->op_actions[TGSI_OPCODE_U2I64].emit = u2i64_emit;
> +
> +   bld_base->op_actions[TGSI_OPCODE_I642F].emit = i642f_emit;
> +   bld_base->op_actions[TGSI_OPCODE_U642F].emit = u642f_emit;
> +
> +   bld_base->op_actions[TGSI_OPCODE_I642F].emit = i642f_emit;
> +   bld_base->op_actions[TGSI_OPCODE_U642F].emit = u642f_emit;
> +
> +   bld_base->op_actions[TGSI_OPCODE_I642D].emit = i642d_emit;
> +   bld_base->op_actions[TGSI_OPCODE_U642D].emit = u642d_emit;
> +
>  }
>  
>  /* CPU Only default actions */
>  
>  /* These actions are CPU only, because they could potentially output SSE
>   * intrinsics.
>   */
>  
>  /* TGSI_OPCODE_ABS (CPU Only)*/
>  
> @@ -2133,20 +2363,236 @@ drecip_sqrt_emit_cpu(
>  static void
>  dsqrt_emit_cpu(
>     const struct lp_build_tgsi_action * action,
>     struct lp_build_tgsi_context * bld_base,
>     struct lp_build_emit_data * emit_data)
>  {
>     emit_data->output[emit_data->chan] = lp_build_sqrt(&bld_base->dbl_bld,
>                                                        emit_data->args[0]);
>  }
>  
> +static void
> +i64abs_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->int64_bld,
> +                                                       emit_data->args[0]);
> +}
> +
> +static void
> +i64ssg_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->int64_bld,
> +                                                       emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_INEG (CPU Only) */
I64NEG

> +static void
> +i64neg_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->int64_bld,
> +                                                     bld_base->int64_bld.zero,
> +                                                     emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_DSET Helper (CPU Only) */

> +static void
> +u64set_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data,
> +   unsigned pipe_func)
> +{
> +   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> +   LLVMValueRef cond = lp_build_cmp(&bld_base->uint64_bld, pipe_func,
> +                                    emit_data->args[0], emit_data->args[1]);
> +   /* arguments were 64 bit but store as 32 bit */
> +   cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
> +   emit_data->output[emit_data->chan] = cond;
> +}
> +
> +static void
> +u64seq_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
> +}
> +
> +static void
> +u64sne_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
> +}
> +
> +static void
> +u64slt_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
> +}
> +
> +static void
> +u64sge_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
> +}
> +
> +static void
> +i64set_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data,
> +   unsigned pipe_func)
> +{
> +   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> +   LLVMValueRef cond = lp_build_cmp(&bld_base->int64_bld, pipe_func,
> +                                    emit_data->args[0], emit_data->args[1]);
> +   /* arguments were 64 bit but store as 32 bit */
> +   cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
> +   emit_data->output[emit_data->chan] = cond;
> +}
> +
> +static void
> +i64slt_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   i64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
> +}
> +
> +static void
> +i64sge_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   i64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
> +}
> +
> +static void
> +u64max_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] = lp_build_max(&bld_base->uint64_bld,
> +                                   emit_data->args[0], emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_UMIN (CPU Only) */
U64MIN

> +static void
> +u64min_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] = lp_build_min(&bld_base->uint64_bld,
> +                                   emit_data->args[0], emit_data->args[1]);
> +}
> +
> +static void
> +i64max_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] = lp_build_max(&bld_base->int64_bld,
> +                                   emit_data->args[0], emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_UMIN (CPU Only) */
I64MIN

> +static void
> +i64min_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] = lp_build_min(&bld_base->int64_bld,
> +                                   emit_data->args[0], emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_U64ADD (CPU Only) */
> +static void
> +u64add_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] = lp_build_add(&bld_base->uint64_bld,
> +                                   emit_data->args[0], emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_U64SHL (CPU Only) */
> +static void
> +u64shl_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_context *uint_bld = &bld_base->uint64_bld;
> +   LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
> +                                          uint_bld->type.width - 1);
> +   LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
> +   emit_data->output[emit_data->chan] = lp_build_shl(uint_bld, emit_data->args[0],
> +                                                     masked_count);
> +}
> +
> +/* TGSI_OPCODE_I64SHR (CPU Only) */
> +static void
> +i64shr_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_context *int_bld = &bld_base->int64_bld;
> +   LLVMValueRef mask = lp_build_const_vec(int_bld->gallivm, int_bld->type,
> +                                          int_bld->type.width - 1);
> +   LLVMValueRef masked_count = lp_build_and(int_bld, emit_data->args[1], mask);
> +   emit_data->output[emit_data->chan] = lp_build_shr(int_bld, emit_data->args[0],
> +                                                     masked_count);
> +}
> +
> +/* TGSI_OPCODE_U64SHR (CPU Only) */
> +static void
> +u64shr_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_context *uint_bld = &bld_base->uint64_bld;
> +   LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
> +                                          uint_bld->type.width - 1);
> +   LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
> +   emit_data->output[emit_data->chan] = lp_build_shr(uint_bld, emit_data->args[0],
> +                                                     masked_count);
> +}
> +
> +/* TGSI_OPCODE_UDIV (CPU Only) */
>  void
>  lp_set_default_actions_cpu(
>     struct lp_build_tgsi_context * bld_base)
>  {
>     lp_set_default_actions(bld_base);
>     bld_base->op_actions[TGSI_OPCODE_ABS].emit = abs_emit_cpu;
>     bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit_cpu;
>     bld_base->op_actions[TGSI_OPCODE_AND].emit = and_emit_cpu;
>     bld_base->op_actions[TGSI_OPCODE_ARL].emit = arl_emit_cpu;
>     bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit_cpu;
> @@ -2216,11 +2662,36 @@ lp_set_default_actions_cpu(
>     bld_base->op_actions[TGSI_OPCODE_DABS].emit = dabs_emit_cpu;
>     bld_base->op_actions[TGSI_OPCODE_DNEG].emit = dneg_emit_cpu;
>     bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = dseq_emit_cpu;
>     bld_base->op_actions[TGSI_OPCODE_DSGE].emit = dsge_emit_cpu;
>     bld_base->op_actions[TGSI_OPCODE_DSLT].emit = dslt_emit_cpu;
>     bld_base->op_actions[TGSI_OPCODE_DSNE].emit = dsne_emit_cpu;
>  
>     bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = drecip_sqrt_emit_cpu;
>     bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = dsqrt_emit_cpu;
>  
> +   bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = i64abs_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = i64ssg_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = i64neg_emit_cpu;
> +
> +   bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = u64seq_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = u64sne_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = u64slt_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = u64sge_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = i64slt_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = i64sge_emit_cpu;
> +
> +   bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = u64min_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = u64max_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = i64min_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = i64max_emit_cpu;
> +
> +   bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = u64add_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = u64mod_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = i64mod_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = u64div_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = i64div_emit_cpu;
> +
> +   bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = u64shl_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = i64shr_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = u64shr_emit_cpu;
>  }
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index 5b76733..6871795 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -1161,20 +1161,26 @@ stype_to_fetch(struct lp_build_tgsi_context * bld_base,
>        break;
>     case TGSI_TYPE_UNSIGNED:
>        bld_fetch = &bld_base->uint_bld;
>        break;
>     case TGSI_TYPE_SIGNED:
>        bld_fetch = &bld_base->int_bld;
>        break;
>     case TGSI_TYPE_DOUBLE:
>        bld_fetch = &bld_base->dbl_bld;
>        break;
> +   case TGSI_TYPE_UNSIGNED64:
> +      bld_fetch = &bld_base->uint64_bld;
> +      break;
> +   case TGSI_TYPE_SIGNED64:
> +      bld_fetch = &bld_base->int64_bld;
> +      break;
>     case TGSI_TYPE_VOID:
>     default:
>        assert(0);
>        bld_fetch = NULL;
>        break;
>     }
>     return bld_fetch;
>  }
>  
>  static LLVMValueRef
> @@ -1278,26 +1284,34 @@ emit_fetch_constant(
>        LLVMValueRef scalar, scalar_ptr;
>        struct lp_build_context *bld_broad = &bld_base->base;
>        index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
>  
>        scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
>                                  &index, 1, "");
>        if (stype == TGSI_TYPE_DOUBLE) {
>           LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
>           scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
>           bld_broad = &bld_base->dbl_bld;
> +      } else if (stype == TGSI_TYPE_UNSIGNED64) {
> +         LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
> +         scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
> +         bld_broad = &bld_base->uint64_bld;
> +      } else if (stype == TGSI_TYPE_SIGNED64) {
> +         LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
> +         scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
> +         bld_broad = &bld_base->int64_bld;
>        }
>        scalar = LLVMBuildLoad(builder, scalar_ptr, "");
>        res = lp_build_broadcast_scalar(bld_broad, scalar);
>     }
>  
> -   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE) {
> +   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
>        struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
>        res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
>     }
>  
>     return res;
>  }
>  
>  /**
>   * Fetch 64-bit values from two separate channels.
>   * 64-bit values are stored split across two channels, like xy and zw.
> @@ -1396,21 +1410,21 @@ emit_fetch_immediate(
>              res = emit_fetch_64bit(bld_base, stype, res, res2);
>           }
>        }
>     }
>     else {
>        res = bld->immediates[reg->Register.Index][swizzle];
>        if (tgsi_type_is_64bit(stype))
>           res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]);
>     }
>  
> -   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE) {
> +   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
>        struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
>        res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
>     }
>     return res;
>  }
>  
>  static LLVMValueRef
>  emit_fetch_input(
>     struct lp_build_tgsi_context * bld_base,
>     const struct tgsi_full_src_register * reg,
> @@ -1473,21 +1487,21 @@ emit_fetch_input(
>        }
>        else {
>           res = bld->inputs[reg->Register.Index][swizzle];
>           if (tgsi_type_is_64bit(stype))
>              res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]);
>        }
>     }
>  
>     assert(res);
>  
> -   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE) {
> +   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
>        struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
>        res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
>     }
>  
>     return res;
>  }
>  
>  
>  static LLVMValueRef
>  emit_fetch_gs_input(
> @@ -1610,21 +1624,25 @@ emit_fetch_temporary(
>  
>        if (tgsi_type_is_64bit(stype)) {
>           LLVMValueRef temp_ptr2, res2;
>  
>           temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1);
>           res2 = LLVMBuildLoad(builder, temp_ptr2, "");
>           res = emit_fetch_64bit(bld_base, stype, res, res2);
>        }
>     }
>  
> -   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE) {
> +   if (stype == TGSI_TYPE_SIGNED ||
> +       stype == TGSI_TYPE_UNSIGNED ||
> +       stype == TGSI_TYPE_DOUBLE ||
> +       stype == TGSI_TYPE_SIGNED64 ||
> +       stype == TGSI_TYPE_UNSIGNED64) {
>        struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
>        res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
>     }
>  
>     return res;
>  }
>  
>  static LLVMValueRef
>  emit_fetch_system_value(
>     struct lp_build_tgsi_context * bld_base,
> @@ -3038,20 +3056,22 @@ void lp_emit_immediate_soa(
>     const uint size = imm->Immediate.NrTokens - 1;
>     assert(size <= 4);
>     switch (imm->Immediate.DataType) {
>     case TGSI_IMM_FLOAT32:
>        for( i = 0; i < size; ++i )
>           imms[i] =
>                 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
>  
>        break;
>     case TGSI_IMM_FLOAT64:
> +   case TGSI_IMM_UINT64:
> +   case TGSI_IMM_INT64:
>     case TGSI_IMM_UINT32:
>        for( i = 0; i < size; ++i ) {
>           LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
>           imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
>        }
>  
>        break;
>     case TGSI_IMM_INT32:
>        for( i = 0; i < size; ++i ) {
>           LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
> @@ -3895,20 +3915,32 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
>     lp_build_context_init(&bld.bld_base.base, gallivm, type);
>     lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
>     lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
>     lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
>     {
>        struct lp_type dbl_type;
>        dbl_type = type;
>        dbl_type.width *= 2;
>        lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
>     }
> +   {
> +      struct lp_type uint64_type;
> +      uint64_type = lp_uint_type(type);
> +      uint64_type.width *= 2;
> +      lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
> +   }
> +   {
> +      struct lp_type int64_type;
> +      int64_type = lp_int_type(type);
> +      int64_type.width *= 2;
> +      lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
> +   }
>     bld.mask = mask;
>     bld.inputs = inputs;
>     bld.outputs = outputs;
>     bld.consts_ptr = consts_ptr;
>     bld.const_sizes_ptr = const_sizes_ptr;
>     bld.sampler = sampler;
>     bld.bld_base.info = info;
>     bld.indirect_files = info->indirect_files;
>     bld.context_ptr = context_ptr;
>     bld.thread_data_ptr = thread_data_ptr;
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h
> index 8830f5a..e60888f 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_info.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.h
> @@ -98,21 +98,22 @@ enum tgsi_opcode_type {
>     TGSI_TYPE_UNSIGNED,
>     TGSI_TYPE_SIGNED,
>     TGSI_TYPE_FLOAT,
>     TGSI_TYPE_DOUBLE,
>     TGSI_TYPE_UNSIGNED64,
>     TGSI_TYPE_SIGNED64,
>  };
>  
>  static inline bool tgsi_type_is_64bit(enum tgsi_opcode_type type)
>  {
> -   if (type == TGSI_TYPE_DOUBLE)
> +   if (type == TGSI_TYPE_DOUBLE || type == TGSI_TYPE_UNSIGNED64 ||
> +       type == TGSI_TYPE_SIGNED64)
>        return true;
>     return false;
>  }
>  
>  enum tgsi_opcode_type
>  tgsi_opcode_infer_src_type( uint opcode );
>  
>  enum tgsi_opcode_type
>  tgsi_opcode_infer_dst_type( uint opcode );
>  
> 

There's a couple bogus comments wrt opcode names marked above - either
fix them up or drop the comment.

Otherwise, for 2-4,
Reviewed-by: Roland Scheidegger <sroland at vmware.com>