[Mesa-dev] [PATCH v2 4/6] gallivm/llvmpipe: prepare support for ARB_gpu_shader_int64.
Roland Scheidegger
sroland at vmware.com
Mon Sep 19 16:25:31 UTC 2016
Am 19.09.2016 um 15:08 schrieb Nicolai Hähnle:
> From: Dave Airlie <airlied at redhat.com>
>
> This enables 64-bit integer support in gallivm and
> llvmpipe.
>
> v2: add conversion opcodes.
> v3:
> - PIPE_CAP_INT64 is not there yet
> - restrict DIV/MOD defaults to the CPU, as for 32 bits
> - TGSI_OPCODE_I2U64 becomes TGSI_OPCODE_U2I64
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
> src/gallium/auxiliary/gallivm/lp_bld_tgsi.c | 2 +
> src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 4 +
> src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 471 +++++++++++++++++++++
> src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 40 +-
> src/gallium/auxiliary/tgsi/tgsi_info.h | 3 +-
> 5 files changed, 515 insertions(+), 5 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> index 1ef6ae4..b397261 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> @@ -357,20 +357,22 @@ lp_build_emit_fetch(
> if (reg->Register.Absolute) {
> switch (stype) {
> case TGSI_TYPE_FLOAT:
> case TGSI_TYPE_DOUBLE:
> case TGSI_TYPE_UNTYPED:
> /* modifiers on movs assume data is float */
> res = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS, res);
> break;
> case TGSI_TYPE_UNSIGNED:
> case TGSI_TYPE_SIGNED:
> + case TGSI_TYPE_UNSIGNED64:
> + case TGSI_TYPE_SIGNED64:
> case TGSI_TYPE_VOID:
> default:
> /* abs modifier is only legal on floating point types */
> assert(0);
> break;
> }
> }
>
> if (reg->Register.Negate) {
> switch (stype) {
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> index de1150c..b6b3fe3 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> @@ -330,20 +330,24 @@ typedef LLVMValueRef (*lp_build_emit_fetch_fn)(struct lp_build_tgsi_context *,
> unsigned);
>
> struct lp_build_tgsi_context
> {
> struct lp_build_context base;
>
> struct lp_build_context uint_bld;
> struct lp_build_context int_bld;
>
> struct lp_build_context dbl_bld;
> +
> + struct lp_build_context uint64_bld;
> + struct lp_build_context int64_bld;
> +
> /** This array stores functions that are used to transform TGSI opcodes to
> * LLVM instructions.
> */
> struct lp_build_tgsi_action op_actions[TGSI_OPCODE_LAST];
>
> /* TGSI_OPCODE_RSQ is defined as 1 / sqrt( abs(src0.x) ), rsq_action
> * should compute 1 / sqrt (src0.x) */
> struct lp_build_tgsi_action rsq_action;
>
> struct lp_build_tgsi_action sqrt_action;
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> index 1ee9704..d924770 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> @@ -1086,20 +1086,230 @@ static void dfrac_emit(
> struct lp_build_tgsi_context * bld_base,
> struct lp_build_emit_data * emit_data)
> {
> LLVMValueRef tmp;
> tmp = lp_build_floor(&bld_base->dbl_bld,
> emit_data->args[0]);
> emit_data->output[emit_data->chan] = LLVMBuildFSub(bld_base->base.gallivm->builder,
> emit_data->args[0], tmp, "");
> }
>
> +/* TGSI_OPCODE_U64MUL */
> +static void
> +u64mul_emit(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] = lp_build_mul(&bld_base->uint64_bld,
> + emit_data->args[0], emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_U64MOD */
> +static void
> +u64mod_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> + LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint64_bld,
> + PIPE_FUNC_EQUAL, emit_data->args[1],
> + bld_base->uint64_bld.zero);
> + /* We want to make sure that we never divide/mod by zero to not
> + * generate sigfpe. We don't want to crash just because the
> + * shader is doing something weird. */
> + LLVMValueRef divisor = LLVMBuildOr(builder,
> + div_mask,
> + emit_data->args[1], "");
> + LLVMValueRef result = lp_build_mod(&bld_base->uint64_bld,
> + emit_data->args[0], divisor);
> + /* umod by zero doesn't have a guaranteed return value chose -1 for now. */
> + emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
> + div_mask,
> + result, "");
> +}
> +
> +/* TGSI_OPCODE_MOD (CPU Only) */
I64MOD
> +static void
> +i64mod_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> + LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint64_bld,
> + PIPE_FUNC_EQUAL, emit_data->args[1],
> + bld_base->uint64_bld.zero);
> + /* We want to make sure that we never divide/mod by zero to not
> + * generate sigfpe. We don't want to crash just because the
> + * shader is doing something weird. */
> + LLVMValueRef divisor = LLVMBuildOr(builder,
> + div_mask,
> + emit_data->args[1], "");
> + LLVMValueRef result = lp_build_mod(&bld_base->int64_bld,
> + emit_data->args[0], divisor);
> + /* umod by zero doesn't have a guaranteed return value chose -1 for now. */
> + emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
> + div_mask,
> + result, "");
> +}
> +
> +/* TGSI_OPCODE_U64DIV */
> +static void
> +u64div_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> +
> + LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> + LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint64_bld,
> + PIPE_FUNC_EQUAL, emit_data->args[1],
> + bld_base->uint64_bld.zero);
> + /* We want to make sure that we never divide/mod by zero to not
> + * generate sigfpe. We don't want to crash just because the
> + * shader is doing something weird. */
> + LLVMValueRef divisor = LLVMBuildOr(builder,
> + div_mask,
> + emit_data->args[1], "");
> + LLVMValueRef result = LLVMBuildUDiv(builder,
> + emit_data->args[0], divisor, "");
> + /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10 */
> + emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
> + div_mask,
> + result, "");
> +}
> +
> +/* TGSI_OPCODE_I64DIV */
> +static void
> +i64div_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> +
> + LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> + LLVMValueRef div_mask = lp_build_cmp(&bld_base->int64_bld,
> + PIPE_FUNC_EQUAL, emit_data->args[1],
> + bld_base->int64_bld.zero);
> + /* We want to make sure that we never divide/mod by zero to not
> + * generate sigfpe. We don't want to crash just because the
> + * shader is doing something weird. */
> + LLVMValueRef divisor = LLVMBuildOr(builder,
> + div_mask,
> + emit_data->args[1], "");
> + LLVMValueRef result = LLVMBuildSDiv(builder,
> + emit_data->args[0], divisor, "");
> + /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10 */
> + emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
> + div_mask,
> + result, "");
> +}
> +
> +/* TGSI_OPCODE_F2U */
F2U64
> +static void
> +f2u64_emit(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> + LLVMBuildFPToUI(bld_base->base.gallivm->builder,
> + emit_data->args[0],
> + bld_base->uint64_bld.vec_type, "");
> +}
> +
> +static void
> +f2i64_emit(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> + LLVMBuildFPToSI(bld_base->base.gallivm->builder,
> + emit_data->args[0],
> + bld_base->int64_bld.vec_type, "");
> +}
> +
> +static void
> +u2i64_emit(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> + LLVMBuildZExt(bld_base->base.gallivm->builder,
> + emit_data->args[0],
> + bld_base->uint64_bld.vec_type, "");
> +}
> +
> +static void
> +i2i64_emit(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> + LLVMBuildSExt(bld_base->base.gallivm->builder,
> + emit_data->args[0],
> + bld_base->int64_bld.vec_type, "");
> +}
> +
> +static void
> +i642f_emit(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> + LLVMBuildSIToFP(bld_base->base.gallivm->builder,
> + emit_data->args[0],
> + bld_base->base.vec_type, "");
> +}
> +
> +static void
> +u642f_emit(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> + LLVMBuildUIToFP(bld_base->base.gallivm->builder,
> + emit_data->args[0],
> + bld_base->base.vec_type, "");
> +}
> +
> +static void
> +i642d_emit(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> + LLVMBuildSIToFP(bld_base->base.gallivm->builder,
> + emit_data->args[0],
> + bld_base->dbl_bld.vec_type, "");
> +}
> +
> +static void
> +u642d_emit(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> + LLVMBuildUIToFP(bld_base->base.gallivm->builder,
> + emit_data->args[0],
> + bld_base->dbl_bld.vec_type, "");
> +}
> +
> void
> lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
> {
> bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action;
> bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action;
> bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action;
> bld_base->op_actions[TGSI_OPCODE_DP2A] = dp2a_action;
> bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action;
> bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
> bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
> @@ -1161,20 +1371,40 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
>
> bld_base->op_actions[TGSI_OPCODE_F2D].emit = f2d_emit;
> bld_base->op_actions[TGSI_OPCODE_I2D].emit = i2d_emit;
> bld_base->op_actions[TGSI_OPCODE_U2D].emit = u2d_emit;
>
> bld_base->op_actions[TGSI_OPCODE_DMAD].emit = dmad_emit;
>
> bld_base->op_actions[TGSI_OPCODE_DRCP].emit = drcp_emit;
> bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = dfrac_emit;
>
> + bld_base->op_actions[TGSI_OPCODE_U64MUL].emit = u64mul_emit;
> +
> + bld_base->op_actions[TGSI_OPCODE_F2I64].emit = f2i64_emit;
> + bld_base->op_actions[TGSI_OPCODE_F2U64].emit = f2u64_emit;
> +
> + bld_base->op_actions[TGSI_OPCODE_D2I64].emit = f2i64_emit;
> + bld_base->op_actions[TGSI_OPCODE_D2U64].emit = f2u64_emit;
> +
> + bld_base->op_actions[TGSI_OPCODE_I2I64].emit = i2i64_emit;
> + bld_base->op_actions[TGSI_OPCODE_U2I64].emit = u2i64_emit;
> +
> + bld_base->op_actions[TGSI_OPCODE_I642F].emit = i642f_emit;
> + bld_base->op_actions[TGSI_OPCODE_U642F].emit = u642f_emit;
> +
> + bld_base->op_actions[TGSI_OPCODE_I642F].emit = i642f_emit;
> + bld_base->op_actions[TGSI_OPCODE_U642F].emit = u642f_emit;
> +
> + bld_base->op_actions[TGSI_OPCODE_I642D].emit = i642d_emit;
> + bld_base->op_actions[TGSI_OPCODE_U642D].emit = u642d_emit;
> +
> }
>
> /* CPU Only default actions */
>
> /* These actions are CPU only, because they could potentially output SSE
> * intrinsics.
> */
>
> /* TGSI_OPCODE_ABS (CPU Only)*/
>
> @@ -2133,20 +2363,236 @@ drecip_sqrt_emit_cpu(
> static void
> dsqrt_emit_cpu(
> const struct lp_build_tgsi_action * action,
> struct lp_build_tgsi_context * bld_base,
> struct lp_build_emit_data * emit_data)
> {
> emit_data->output[emit_data->chan] = lp_build_sqrt(&bld_base->dbl_bld,
> emit_data->args[0]);
> }
>
> +static void
> +i64abs_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->int64_bld,
> + emit_data->args[0]);
> +}
> +
> +static void
> +i64ssg_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->int64_bld,
> + emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_INEG (CPU Only) */
I64NEG
> +static void
> +i64neg_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->int64_bld,
> + bld_base->int64_bld.zero,
> + emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_DSET Helper (CPU Only) */
> +static void
> +u64set_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data,
> + unsigned pipe_func)
> +{
> + LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> + LLVMValueRef cond = lp_build_cmp(&bld_base->uint64_bld, pipe_func,
> + emit_data->args[0], emit_data->args[1]);
> + /* arguments were 64 bit but store as 32 bit */
> + cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
> + emit_data->output[emit_data->chan] = cond;
> +}
> +
> +static void
> +u64seq_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
> +}
> +
> +static void
> +u64sne_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
> +}
> +
> +static void
> +u64slt_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
> +}
> +
> +static void
> +u64sge_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
> +}
> +
> +static void
> +i64set_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data,
> + unsigned pipe_func)
> +{
> + LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> + LLVMValueRef cond = lp_build_cmp(&bld_base->int64_bld, pipe_func,
> + emit_data->args[0], emit_data->args[1]);
> + /* arguments were 64 bit but store as 32 bit */
> + cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
> + emit_data->output[emit_data->chan] = cond;
> +}
> +
> +static void
> +i64slt_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + i64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
> +}
> +
> +static void
> +i64sge_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + i64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
> +}
> +
> +static void
> +u64max_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] = lp_build_max(&bld_base->uint64_bld,
> + emit_data->args[0], emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_UMIN (CPU Only) */
U64MIN
> +static void
> +u64min_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] = lp_build_min(&bld_base->uint64_bld,
> + emit_data->args[0], emit_data->args[1]);
> +}
> +
> +static void
> +i64max_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] = lp_build_max(&bld_base->int64_bld,
> + emit_data->args[0], emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_UMIN (CPU Only) */
I64MIN
> +static void
> +i64min_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] = lp_build_min(&bld_base->int64_bld,
> + emit_data->args[0], emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_U64ADD (CPU Only) */
> +static void
> +u64add_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] = lp_build_add(&bld_base->uint64_bld,
> + emit_data->args[0], emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_U64SHL (CPU Only) */
> +static void
> +u64shl_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_context *uint_bld = &bld_base->uint64_bld;
> + LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
> + uint_bld->type.width - 1);
> + LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
> + emit_data->output[emit_data->chan] = lp_build_shl(uint_bld, emit_data->args[0],
> + masked_count);
> +}
> +
> +/* TGSI_OPCODE_I64SHR (CPU Only) */
> +static void
> +i64shr_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_context *int_bld = &bld_base->int64_bld;
> + LLVMValueRef mask = lp_build_const_vec(int_bld->gallivm, int_bld->type,
> + int_bld->type.width - 1);
> + LLVMValueRef masked_count = lp_build_and(int_bld, emit_data->args[1], mask);
> + emit_data->output[emit_data->chan] = lp_build_shr(int_bld, emit_data->args[0],
> + masked_count);
> +}
> +
> +/* TGSI_OPCODE_U64SHR (CPU Only) */
> +static void
> +u64shr_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_context *uint_bld = &bld_base->uint64_bld;
> + LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
> + uint_bld->type.width - 1);
> + LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
> + emit_data->output[emit_data->chan] = lp_build_shr(uint_bld, emit_data->args[0],
> + masked_count);
> +}
> +
> +/* TGSI_OPCODE_UDIV (CPU Only) */
> void
> lp_set_default_actions_cpu(
> struct lp_build_tgsi_context * bld_base)
> {
> lp_set_default_actions(bld_base);
> bld_base->op_actions[TGSI_OPCODE_ABS].emit = abs_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_AND].emit = and_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_ARL].emit = arl_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit_cpu;
> @@ -2216,11 +2662,36 @@ lp_set_default_actions_cpu(
> bld_base->op_actions[TGSI_OPCODE_DABS].emit = dabs_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_DNEG].emit = dneg_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = dseq_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_DSGE].emit = dsge_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_DSLT].emit = dslt_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_DSNE].emit = dsne_emit_cpu;
>
> bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = drecip_sqrt_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = dsqrt_emit_cpu;
>
> + bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = i64abs_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = i64ssg_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = i64neg_emit_cpu;
> +
> + bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = u64seq_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = u64sne_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = u64slt_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = u64sge_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = i64slt_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = i64sge_emit_cpu;
> +
> + bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = u64min_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = u64max_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = i64min_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = i64max_emit_cpu;
> +
> + bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = u64add_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = u64mod_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = i64mod_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = u64div_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = i64div_emit_cpu;
> +
> + bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = u64shl_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = i64shr_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = u64shr_emit_cpu;
> }
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index 5b76733..6871795 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -1161,20 +1161,26 @@ stype_to_fetch(struct lp_build_tgsi_context * bld_base,
> break;
> case TGSI_TYPE_UNSIGNED:
> bld_fetch = &bld_base->uint_bld;
> break;
> case TGSI_TYPE_SIGNED:
> bld_fetch = &bld_base->int_bld;
> break;
> case TGSI_TYPE_DOUBLE:
> bld_fetch = &bld_base->dbl_bld;
> break;
> + case TGSI_TYPE_UNSIGNED64:
> + bld_fetch = &bld_base->uint64_bld;
> + break;
> + case TGSI_TYPE_SIGNED64:
> + bld_fetch = &bld_base->int64_bld;
> + break;
> case TGSI_TYPE_VOID:
> default:
> assert(0);
> bld_fetch = NULL;
> break;
> }
> return bld_fetch;
> }
>
> static LLVMValueRef
> @@ -1278,26 +1284,34 @@ emit_fetch_constant(
> LLVMValueRef scalar, scalar_ptr;
> struct lp_build_context *bld_broad = &bld_base->base;
> index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
>
> scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
> &index, 1, "");
> if (stype == TGSI_TYPE_DOUBLE) {
> LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
> scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
> bld_broad = &bld_base->dbl_bld;
> + } else if (stype == TGSI_TYPE_UNSIGNED64) {
> + LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
> + scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
> + bld_broad = &bld_base->uint64_bld;
> + } else if (stype == TGSI_TYPE_SIGNED64) {
> + LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
> + scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
> + bld_broad = &bld_base->int64_bld;
> }
> scalar = LLVMBuildLoad(builder, scalar_ptr, "");
> res = lp_build_broadcast_scalar(bld_broad, scalar);
> }
>
> - if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE) {
> + if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
> struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
> res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
> }
>
> return res;
> }
>
> /**
> * Fetch 64-bit values from two separate channels.
> * 64-bit values are stored split across two channels, like xy and zw.
> @@ -1396,21 +1410,21 @@ emit_fetch_immediate(
> res = emit_fetch_64bit(bld_base, stype, res, res2);
> }
> }
> }
> else {
> res = bld->immediates[reg->Register.Index][swizzle];
> if (tgsi_type_is_64bit(stype))
> res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]);
> }
>
> - if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE) {
> + if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
> struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
> res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
> }
> return res;
> }
>
> static LLVMValueRef
> emit_fetch_input(
> struct lp_build_tgsi_context * bld_base,
> const struct tgsi_full_src_register * reg,
> @@ -1473,21 +1487,21 @@ emit_fetch_input(
> }
> else {
> res = bld->inputs[reg->Register.Index][swizzle];
> if (tgsi_type_is_64bit(stype))
> res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]);
> }
> }
>
> assert(res);
>
> - if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE) {
> + if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
> struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
> res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
> }
>
> return res;
> }
>
>
> static LLVMValueRef
> emit_fetch_gs_input(
> @@ -1610,21 +1624,25 @@ emit_fetch_temporary(
>
> if (tgsi_type_is_64bit(stype)) {
> LLVMValueRef temp_ptr2, res2;
>
> temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1);
> res2 = LLVMBuildLoad(builder, temp_ptr2, "");
> res = emit_fetch_64bit(bld_base, stype, res, res2);
> }
> }
>
> - if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE) {
> + if (stype == TGSI_TYPE_SIGNED ||
> + stype == TGSI_TYPE_UNSIGNED ||
> + stype == TGSI_TYPE_DOUBLE ||
> + stype == TGSI_TYPE_SIGNED64 ||
> + stype == TGSI_TYPE_UNSIGNED64) {
> struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
> res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
> }
>
> return res;
> }
>
> static LLVMValueRef
> emit_fetch_system_value(
> struct lp_build_tgsi_context * bld_base,
> @@ -3038,20 +3056,22 @@ void lp_emit_immediate_soa(
> const uint size = imm->Immediate.NrTokens - 1;
> assert(size <= 4);
> switch (imm->Immediate.DataType) {
> case TGSI_IMM_FLOAT32:
> for( i = 0; i < size; ++i )
> imms[i] =
> lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
>
> break;
> case TGSI_IMM_FLOAT64:
> + case TGSI_IMM_UINT64:
> + case TGSI_IMM_INT64:
> case TGSI_IMM_UINT32:
> for( i = 0; i < size; ++i ) {
> LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
> imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
> }
>
> break;
> case TGSI_IMM_INT32:
> for( i = 0; i < size; ++i ) {
> LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
> @@ -3895,20 +3915,32 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
> lp_build_context_init(&bld.bld_base.base, gallivm, type);
> lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
> lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
> lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
> {
> struct lp_type dbl_type;
> dbl_type = type;
> dbl_type.width *= 2;
> lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
> }
> + {
> + struct lp_type uint64_type;
> + uint64_type = lp_uint_type(type);
> + uint64_type.width *= 2;
> + lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
> + }
> + {
> + struct lp_type int64_type;
> + int64_type = lp_int_type(type);
> + int64_type.width *= 2;
> + lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
> + }
> bld.mask = mask;
> bld.inputs = inputs;
> bld.outputs = outputs;
> bld.consts_ptr = consts_ptr;
> bld.const_sizes_ptr = const_sizes_ptr;
> bld.sampler = sampler;
> bld.bld_base.info = info;
> bld.indirect_files = info->indirect_files;
> bld.context_ptr = context_ptr;
> bld.thread_data_ptr = thread_data_ptr;
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h
> index 8830f5a..e60888f 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_info.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.h
> @@ -98,21 +98,22 @@ enum tgsi_opcode_type {
> TGSI_TYPE_UNSIGNED,
> TGSI_TYPE_SIGNED,
> TGSI_TYPE_FLOAT,
> TGSI_TYPE_DOUBLE,
> TGSI_TYPE_UNSIGNED64,
> TGSI_TYPE_SIGNED64,
> };
>
> static inline bool tgsi_type_is_64bit(enum tgsi_opcode_type type)
> {
> - if (type == TGSI_TYPE_DOUBLE)
> + if (type == TGSI_TYPE_DOUBLE || type == TGSI_TYPE_UNSIGNED64 ||
> + type == TGSI_TYPE_SIGNED64)
> return true;
> return false;
> }
>
> enum tgsi_opcode_type
> tgsi_opcode_infer_src_type( uint opcode );
>
> enum tgsi_opcode_type
> tgsi_opcode_infer_dst_type( uint opcode );
>
>
There's a couple bogus comments wrt opcode names marked above - either
fix them up or drop the comment.
Otherwise, for 2-4,
Reviewed-by: Roland Scheidegger <sroland at vmware.com>
More information about the mesa-dev
mailing list