[Mesa-dev] [PATCH 6/6] gallivm: Add a new interface for doing TGSI->LLVM conversions
Tom Stellard
tstellar at gmail.com
Sun Jan 15 18:38:52 PST 2012
From: Tom Stellard <thomas.stellard at amd.com>
lp_bld_tgsi_soa.c has been adapted to use this new interface, but
lp_bld_tgsi_aos.c has only been partially adapted, since nothing in
gallium currently uses it.
---
src/gallium/auxiliary/Makefile.sources | 2 +
src/gallium/auxiliary/gallivm/lp_bld_action.c | 1182 ++++++++++++++
src/gallium/auxiliary/gallivm/lp_bld_action.h | 138 ++
src/gallium/auxiliary/gallivm/lp_bld_tgsi.c | 409 +++++
src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 341 ++++-
src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c | 551 +++----
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 1981 ++++++++---------------
7 files changed, 2952 insertions(+), 1652 deletions(-)
create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_action.c
create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_action.h
create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index f55a4eb..547f63d 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -155,6 +155,7 @@ GENERATED_SOURCES := \
util/u_half.c
GALLIVM_SOURCES := \
+ gallivm/lp_bld_action.c \
gallivm/lp_bld_arit.c \
gallivm/lp_bld_assert.c \
gallivm/lp_bld_bitarit.c \
@@ -176,6 +177,7 @@ GALLIVM_SOURCES := \
gallivm/lp_bld_sample_soa.c \
gallivm/lp_bld_struct.c \
gallivm/lp_bld_swizzle.c \
+ gallivm/lp_bld_tgsi.c \
gallivm/lp_bld_tgsi_aos.c \
gallivm/lp_bld_tgsi_info.c \
gallivm/lp_bld_tgsi_soa.c \
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_action.c b/src/gallium/auxiliary/gallivm/lp_bld_action.c
new file mode 100644
index 0000000..0b6cc77
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_action.c
@@ -0,0 +1,1182 @@
+/**************************************************************************
+ *
+ * Copyright 2010-2011 Advanced Micro Devices, Inc.
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * TGSI to LLVM IR translation.
+ *
+ * @author Jose Fonseca <jfonseca at vmware.com>
+ * @author Tom Stellard <thomas.stellard at amd.com>
+ *
+ * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
+ * Brian Paul, and others.
+ */
+
+
+#include "lp_bld_action.h"
+
+#include "lp_bld_tgsi.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_const.h"
+#include "lp_bld_gather.h"
+#include "lp_bld_logic.h"
+
+#include "tgsi/tgsi_exec.h"
+
+/* XXX: The CPU only defaults should be repaced by generic ones. In most
+ * cases, the CPU defaults are just wrappers around a function in
+ * lp_build_arit.c and these functions should be inlined here and the CPU
+ * generic code should be removed and placed elsewhere.
+ */
+
+/* Default actions */
+
+/* Generic fetch_arg functions */
+
+static void scalar_unary_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ /* src0.x */
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
+ emit_data->arg_count = 1;
+ emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
+}
+
+static void scalar_binary_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ /* src0.x */
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_X);
+ /* src1.x */
+ emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 1, TGSI_CHAN_X);
+ emit_data->arg_count = 2;
+ emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_ADD */
+static void
+add_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = LLVMBuildFAdd(
+ bld_base->base.gallivm->builder,
+ emit_data->args[0], emit_data->args[1], "");
+}
+
+/* TGSI_OPCODE_ARR */
+static void
+arr_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_emit_llvm_unary(bld_base,
+ TGSI_OPCODE_ROUND, emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_CLAMP */
+static void
+clamp_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef tmp;
+ tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
+ emit_data->args[0],
+ emit_data->args[1]);
+ emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_MIN, tmp, emit_data->args[2]);
+}
+
+/* DP* Helper */
+
+static void
+dp_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data,
+ unsigned dp_components)
+{
+ unsigned chan, src;
+ for (src = 0; src < 2; src++) {
+ for (chan = 0; chan < dp_components; chan++) {
+ emit_data->args[(src * dp_components) + chan] =
+ lp_build_emit_fetch(bld_base, emit_data->inst, src, chan);
+ }
+ }
+ emit_data->dst_type = bld_base->base.elem_type;
+}
+
+/* TGSI_OPCODE_DP2 */
+static void
+dp2_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ dp_fetch_args(bld_base, emit_data, 2);
+}
+
+static void
+dp2_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef tmp0, tmp1;
+ tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+ emit_data->args[0] /* src0.x */,
+ emit_data->args[2] /* src1.x */);
+ tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+ emit_data->args[1] /* src0.y */,
+ emit_data->args[3] /* src1.y */);
+ emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_ADD, tmp0, tmp1);
+}
+
+static struct lp_build_opcode_action dp2_action = {
+ .fetch_args = dp2_fetch_args,
+ .emit = dp2_emit
+};
+
+/* TGSI_OPCODE_DP2A */
+static void
+dp2a_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ dp_fetch_args(bld_base, emit_data, 2);
+ emit_data->args[5] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 2, TGSI_CHAN_X);
+}
+
+static void
+dp2a_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef tmp;
+ tmp = lp_build_emit_llvm(bld_base, TGSI_OPCODE_DP2, emit_data);
+ emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD,
+ emit_data->args[5], tmp);
+}
+
+static struct lp_build_opcode_action dp2a_action = {
+ .fetch_args = dp2a_fetch_args,
+ .emit = dp2a_emit
+};
+
+/* TGSI_OPCODE_DP3 */
+static void
+dp3_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ dp_fetch_args(bld_base, emit_data, 3);
+}
+
+static void
+dp3_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef tmp0, tmp1;
+ tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+ emit_data->args[0] /* src0.x */,
+ emit_data->args[3] /* src1.x */);
+ tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+ emit_data->args[1] /* src0.y */,
+ emit_data->args[4] /* src1.y */);
+ tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp1, tmp0);
+ tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+ emit_data->args[2] /* src0.z */,
+ emit_data->args[5] /* src1.z */);
+ emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_ADD, tmp0, tmp1);
+}
+
+static struct lp_build_opcode_action dp3_action = {
+ .fetch_args = dp3_fetch_args,
+ .emit = dp3_emit
+};
+
+/* TGSI_OPCODDE_DP4 */
+
+static void
+dp4_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ dp_fetch_args(bld_base, emit_data, 4);
+}
+
+static void
+dp4_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef tmp0, tmp1;
+ tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+ emit_data->args[0] /* src0.x */,
+ emit_data->args[4] /* src1.x */);
+ tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+ emit_data->args[1] /* src0.y */,
+ emit_data->args[5] /* src1.y */);
+ tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1);
+ tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+ emit_data->args[2] /* src0.z */,
+ emit_data->args[6] /* src1.z */);
+ tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1);
+ tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+ emit_data->args[3] /* src0.w */,
+ emit_data->args[7] /* src1.w */);
+ emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_ADD, tmp0, tmp1);
+}
+
+static struct lp_build_opcode_action dp4_action = {
+ .fetch_args = dp4_fetch_args,
+ .emit = dp4_emit
+};
+
+/* TGSI_OPCODE_DPH */
+static void
+dph_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ dp_fetch_args(bld_base, emit_data, 4);
+ /* src0.w */
+ emit_data->args[3] = bld_base->base.one;
+}
+
+const struct lp_build_opcode_action dph_action = {
+ .fetch_args = dph_fetch_args,
+ .emit = dp4_emit
+};
+
+/* TGSI_OPCODE_DST */
+static void
+dst_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ /* src0.y */
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_Y);
+ /* src0.z */
+ emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_Z);
+ /* src1.y */
+ emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 1, TGSI_CHAN_Y);
+ /* src1.w */
+ emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 1, TGSI_CHAN_W);
+}
+
+static void
+dst_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ /* dst.x */
+ emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
+
+ /* dst.y */
+ emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_MUL,
+ emit_data->args[0] /* src0.y */,
+ emit_data->args[2] /* src1.y */);
+ /* dst.z */
+ emit_data->output[TGSI_CHAN_Z] = emit_data->args[1]; /* src0.z */
+
+ /* dst.w */
+ emit_data->output[TGSI_CHAN_W] = emit_data->args[3]; /* src1.w */
+}
+
+static struct lp_build_opcode_action dst_action = {
+ .fetch_args = dst_fetch_args,
+ .emit = dst_emit
+};
+
+/* TGSI_OPCODE_END */
+static void
+end_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ bld_base->pc = -1;
+}
+
+/* TGSI_OPCODE_EXP */
+
+static void
+exp_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef floor_x;
+
+ /* floor( src0.x ) */
+ floor_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
+ emit_data->args[0]);
+
+ /* 2 ^ floor( src0.x ) */
+ emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base,
+ TGSI_OPCODE_EX2, floor_x);
+
+ /* src0.x - floor( src0.x ) */
+ emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_SUB, emit_data->args[0] /* src0.x */, floor_x);
+
+ /* 2 ^ src0.x */
+ emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_unary(bld_base,
+ TGSI_OPCODE_EX2, emit_data->args[0] /* src0.x */);
+
+ emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
+}
+
+const struct lp_build_opcode_action exp_action = {
+ .fetch_args = scalar_unary_fetch_args,
+ .emit = exp_emit
+};
+
+/* TGSI_OPCODE_FRC */
+
+static void
+frc_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef tmp;
+ tmp = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
+ emit_data->args[0]);
+ emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_SUB, emit_data->args[0], tmp);
+}
+
+/* TGSI_OPCODE_KIL */
+
+static void
+kil_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ /* src0.x */
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_X);
+ /* src0.y */
+ emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_Y);
+ /* src0.z */
+ emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_Z);
+ /* src0.w */
+ emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_W);
+ emit_data->arg_count = 4;
+ emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context);
+}
+
+/* TGSI_OPCODE_KILP */
+
+static void
+kilp_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context);
+}
+
+/* TGSI_OPCODE_LIT */
+
+static void
+lit_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ /* src0.x */
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
+ /* src0.y */
+ emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
+ /* src0.w */
+ emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
+ emit_data->arg_count = 3;
+}
+
+static void
+lit_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef tmp0, tmp2;
+
+ /* dst.x */
+ emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
+
+ /* dst. y */
+ emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_MAX,
+ emit_data->args[0] /* src0.x */,
+ bld_base->base.zero);
+
+ /* dst.z */
+ /* XMM[1] = SrcReg[0].yyyy */
+ LLVMValueRef tmp1 = emit_data->args[1];
+ /* XMM[1] = max(XMM[1], 0) */
+ tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
+ tmp1, bld_base->base.zero);
+ /* XMM[2] = SrcReg[0].wwww */
+ tmp2 = emit_data->args[2];
+ tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_POW,
+ tmp1, tmp2);
+ tmp0 = emit_data->args[0];
+ emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_ternary(bld_base,
+ TGSI_OPCODE_CMP,
+ tmp0, bld_base->base.zero, tmp1);
+ /* dst.w */
+ emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
+}
+
+static struct lp_build_opcode_action lit_action = {
+ .fetch_args = lit_fetch_args,
+ .emit = lit_emit
+};
+
+/* TGSI_OPCODE_LOG */
+
+static void
+log_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+
+ LLVMValueRef abs_x, log_abs_x, flr_log_abs_x, ex2_flr_log_abs_x;
+
+ /* abs( src0.x) */
+ abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
+ emit_data->args[0] /* src0.x */);
+
+ /* log( abs( src0.x ) ) */
+ log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_LG2,
+ abs_x);
+
+ /* floor( log( abs( src0.x ) ) ) */
+ flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
+ log_abs_x);
+ /* dst.x */
+ emit_data->output[TGSI_CHAN_X] = flr_log_abs_x;
+
+ /* dst.y */
+ ex2_flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_EX2,
+ flr_log_abs_x);
+
+ /* abs( src0.x ) / 2^( floor( lg2( abs( src0.x ) ) ) ) */
+ emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_DIV, abs_x, ex2_flr_log_abs_x);
+
+ /* dst.x */
+ emit_data->output[TGSI_CHAN_Z] = log_abs_x;
+
+ /* dst.w */
+ emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
+}
+
+static struct lp_build_opcode_action log_action = {
+ .fetch_args = scalar_unary_fetch_args,
+ .emit = log_emit
+};
+
+/* TGSI_OPCODE_LRP */
+
+static void
+lrp_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef tmp;
+ tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB,
+ emit_data->args[1],
+ emit_data->args[2]);
+ emit_data->output[emit_data->chan] = lp_build_emit_llvm_ternary(bld_base,
+ TGSI_OPCODE_MAD, emit_data->args[0], tmp, emit_data->args[2]);
+}
+
+/* TGSI_OPCODE_MAD */
+
+static void
+mad_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef tmp;
+ tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+ emit_data->args[0],
+ emit_data->args[1]);
+ emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_ADD, tmp, emit_data->args[2]);
+}
+
+/* TGSI_OPCODE_MOV */
+
+static void
+mov_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = emit_data->args[0];
+}
+
+/* TGSI_OPCODE_MUL */
+static void
+mul_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_mul(&bld_base->base,
+ emit_data->args[0], emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_POW */
+
+static void
+pow_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base,
+ emit_data->args[0], emit_data->args[1]);
+}
+
+static struct lp_build_opcode_action pow_action = {
+ .fetch_args = scalar_binary_fetch_args,
+ .emit = pow_emit
+};
+
+/* TGSI_OPCODE_RSQ */
+
+static void
+rsq_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->args[0] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
+ emit_data->args[0]);
+ if (bld_base->rsq_action.emit) {
+ bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base, emit_data);
+ } else {
+ emit_data->output[emit_data->chan] = bld_base->base.undef;
+ }
+}
+
+const struct lp_build_opcode_action rsq_action = {
+ .fetch_args = scalar_unary_fetch_args,
+ .emit = rsq_emit
+
+};
+
+/* TGSI_OPCODE_SCS */
+static void
+scs_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ /* dst.x */
+ emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base,
+ TGSI_OPCODE_COS, emit_data->args[0]);
+ /* dst.y */
+ emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_unary(bld_base,
+ TGSI_OPCODE_SIN, emit_data->args[0]);
+ /* dst.z */
+ emit_data->output[TGSI_CHAN_Z] = bld_base->base.zero;
+
+ /* dst.w */
+ emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
+}
+
+const struct lp_build_opcode_action scs_action = {
+ .fetch_args = scalar_unary_fetch_args,
+ .emit = scs_emit
+};
+
+/* TGSI_OPCODE_SFL */
+
+static void
+sfl_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = bld_base->base.zero;
+}
+
+/* TGSI_OPCODE_STR */
+
+static void
+str_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = bld_base->base.one;
+}
+
+/* TGSI_OPCODE_SUB */
+static void
+sub_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = LLVMBuildFSub(
+ bld_base->base.gallivm->builder,
+ emit_data->args[0],
+ emit_data->args[1], "");
+}
+
+/* TGSI_OPCODE_XPD */
+
+static void
+xpd_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ dp_fetch_args(bld_base, emit_data, 3);
+}
+
+/**
+ * (a * b) - (c * d)
+ */
+static LLVMValueRef
+xpd_helper(
+ struct lp_build_tgsi_context * bld_base,
+ LLVMValueRef a,
+ LLVMValueRef b,
+ LLVMValueRef c,
+ LLVMValueRef d)
+{
+ LLVMValueRef tmp0, tmp1;
+
+ tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, a, b);
+ tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, c, d);
+
+ return lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB, tmp0, tmp1);
+}
+
+static void
+xpd_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[TGSI_CHAN_X] = xpd_helper(bld_base,
+ emit_data->args[1] /* src0.y */, emit_data->args[5] /* src1.z */,
+ emit_data->args[4] /* src1.y */, emit_data->args[2] /* src0.z */);
+
+ emit_data->output[TGSI_CHAN_Y] = xpd_helper(bld_base,
+ emit_data->args[2] /* src0.z */, emit_data->args[3] /* src1.x */,
+ emit_data->args[5] /* src1.z */, emit_data->args[0] /* src0.x */);
+
+ emit_data->output[TGSI_CHAN_Z] = xpd_helper(bld_base,
+ emit_data->args[0] /* src0.x */, emit_data->args[4] /* src1.y */,
+ emit_data->args[3] /* src1.x */, emit_data->args[1] /* src0.y */);
+
+ emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
+}
+
+const struct lp_build_opcode_action xpd_action = {
+ .fetch_args = xpd_fetch_args,
+ .emit = xpd_emit
+};
+
+void
+lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
+{
+ bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action;
+ bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action;
+ bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action;
+ bld_base->op_actions[TGSI_OPCODE_DP2A] = dp2a_action;
+ bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action;
+ bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
+ bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
+ bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
+ bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
+ bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
+ bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
+ bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
+ bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
+
+ bld_base->op_actions[TGSI_OPCODE_COS].fetch_args = scalar_unary_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args = scalar_unary_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_IF].fetch_args = scalar_unary_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_KIL].fetch_args = kil_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_KILP].fetch_args = kilp_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_RCP].fetch_args = scalar_unary_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_SIN].fetch_args = scalar_unary_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_LG2].fetch_args = scalar_unary_fetch_args;
+
+ bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit;
+ bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit;
+ bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = clamp_emit;
+ bld_base->op_actions[TGSI_OPCODE_END].emit = end_emit;
+ bld_base->op_actions[TGSI_OPCODE_FRC].emit = frc_emit;
+ bld_base->op_actions[TGSI_OPCODE_LRP].emit = lrp_emit;
+ bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit;
+ bld_base->op_actions[TGSI_OPCODE_MOV].emit = mov_emit;
+ bld_base->op_actions[TGSI_OPCODE_MUL].emit = mul_emit;
+ bld_base->op_actions[TGSI_OPCODE_SFL].emit = sfl_emit;
+ bld_base->op_actions[TGSI_OPCODE_STR].emit = str_emit;
+ bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit;
+}
+
+/* CPU Only default actions */
+
+/* These actions are CPU only, because they could potentially output SSE
+ * intrinsics.
+ */
+
+/* TGSI_OPCODE_ABS (CPU Only)*/
+
+static void
+abs_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->base,
+ emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_ADD (CPU Only) */
+static void
+add_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_add(&bld_base->base,
+ emit_data->args[0], emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_CEIL (CPU Only) */
+static void
+ceil_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_trunc(&bld_base->base,
+ emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_CMP (CPU Only) */
+static void
+cmp_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef cond = lp_build_cmp(&bld_base->base, PIPE_FUNC_LESS,
+ emit_data->args[0], bld_base->base.zero);
+ emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
+ cond, emit_data->args[1], emit_data->args[2]);
+}
+
+/* TGSI_OPCODE_CND (CPU Only) */
+static void
+cnd_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef half, tmp;
+ half = lp_build_const_vec(bld_base->base.gallivm, bld_base->base.type, 0.5);
+ tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_GREATER,
+ emit_data->args[2], half);
+ emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
+ tmp,
+ emit_data->args[0],
+ emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_COS (CPU Only) */
+static void
+cos_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_cos(&bld_base->base,
+ emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_DIV (CPU Only) */
+static void
+div_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_div(&bld_base->base,
+ emit_data->args[0], emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_EX2 (CPU Only) */
+static void
+ex2_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_exp2(&bld_base->base,
+ emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_EXP (CPU Only) */
+static void
+exp_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ lp_build_exp2_approx(&bld_base->base, emit_data->args[0],
+ &emit_data->output[TGSI_CHAN_X],
+ &emit_data->output[TGSI_CHAN_Y],
+ &emit_data->output[TGSI_CHAN_Z]);
+ emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
+}
+
+/* TGSI_OPCODE_FLR (CPU Only) */
+
+static void
+flr_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_floor(&bld_base->base,
+ emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_LG2 (CPU Only) */
+static void
+lg2_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_log2(&bld_base->base,
+ emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_LOG (CPU Only) */
+static void
+log_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef p_floor_log2;
+ LLVMValueRef p_exp;
+ LLVMValueRef p_log2;
+ LLVMValueRef src0 = emit_data->args[0];
+
+ lp_build_log2_approx(&bld_base->base, src0,
+ &p_exp, &p_floor_log2, &p_log2);
+
+ emit_data->output[TGSI_CHAN_X] = p_floor_log2;
+
+ emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_DIV,
+ src0, p_exp);
+ emit_data->output[TGSI_CHAN_Z] = p_log2;
+
+ emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
+
+}
+
+/* TGSI_OPCODE_MAX (CPU Only) */
+
+static void
+max_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_max(&bld_base->base,
+ emit_data->args[0], emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_MIN (CPU Only) */
+static void
+min_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_min(&bld_base->base,
+ emit_data->args[0], emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_POW (CPU Only) */
+static void
+pow_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base,
+ emit_data->args[0], emit_data->args[1]);
+}
+
+
+/* TGSI_OPCODE_RCP (CPU Only) */
+
+static void
+rcp_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_rcp(&bld_base->base,
+ emit_data->args[0]);
+}
+
+/* Reciprical squareroot (CPU Only) */
+
+/* This is not the same as TGSI_OPCODE_RSQ, which requres the argument to be
+ * greater than or equal to 0 */
+static void
+recip_sqrt_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_rsqrt(&bld_base->base,
+ emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_ROUND (CPU Only) */
+static void
+round_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_round(&bld_base->base,
+ emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_SET Helper (CPU Only) */
+
+static void
+set_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data,
+ unsigned pipe_func)
+{
+ LLVMValueRef cond = lp_build_cmp(&bld_base->base, pipe_func,
+ emit_data->args[0], emit_data->args[1]);
+ emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
+ cond,
+ bld_base->base.one,
+ bld_base->base.zero);
+}
+
+/* TGSI_OPCODE_SEQ (CPU Only) */
+
+static void
+seq_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
+}
+
+/* TGSI_OPCODE_SGE (CPU Only) */
+static void
+sge_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
+}
+
+/* TGSI_OPCODE_SGT (CPU Only)*/
+
+static void
+sgt_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GREATER);
+}
+
+/* TGSI_OPCODE_SIN (CPU Only) */
+static void
+sin_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_sin(&bld_base->base,
+ emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_SLE (CPU Only) */
+static void
+sle_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LEQUAL);
+}
+
+/* TGSI_OPCODE_SLT (CPU Only) */
+
+static void
+slt_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
+}
+
+/* TGSI_OPCODE_SNE (CPU Only) */
+
+static void
+sne_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
+}
+
+/* TGSI_OPCODE_SSG (CPU Only) */
+
+static void
+ssg_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->base,
+ emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_SUB (CPU Only) */
+
+static void
+sub_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->base,
+ emit_data->args[0],
+ emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_TRUNC (CPU Only) */
+
+static void
+trunc_emit_cpu(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_trunc(&bld_base->base,
+ emit_data->args[0]);
+}
+
+void
+lp_set_default_actions_cpu(
+ struct lp_build_tgsi_context * bld_base)
+{
+ lp_set_default_actions(bld_base);
+ bld_base->op_actions[TGSI_OPCODE_ABS].emit = abs_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_ARL].emit = flr_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_CEIL].emit = ceil_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_CND].emit = cnd_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_COS].emit = cos_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_EXP].emit = exp_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_MAX].emit = max_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_MIN].emit = min_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_POW].emit = pow_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_ROUND].emit = round_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_SEQ].emit = seq_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_SGE].emit = sge_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_SGT].emit = sgt_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_SIN].emit = sin_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_SLE].emit = sle_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_SLT].emit = slt_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_SNE].emit = sne_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_SSG].emit = ssg_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = trunc_emit_cpu;
+
+ bld_base->rsq_action.emit = recip_sqrt_emit_cpu;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_action.h b/src/gallium/auxiliary/gallivm/lp_bld_action.h
new file mode 100644
index 0000000..5495a86
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_action.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2010-2011 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ *
+ * @author Tom Stellard <thomas.stellard at amd.com>
+ *
+ */
+
+
+#ifndef LP_BLD_ACTION_H
+#define LP_BLD_ACTION_H
+
+#include <llvm-c/Core.h>
+
+struct lp_build_tgsi_context;
+
+struct lp_build_emit_data {
+ /** Arguments that are passed to lp_build_opcode_action::emit. The
+ * order of the arguments should be as follows:
+ * SOA: s0.x, s0.y, s0.z, s0.w, s1.x, s1.y, s1.z, s1.w, s2.x, s2.y, s2.x, s2.w
+ * AOS: s0.xyzw, s1.xyzw, s2.xyzw
+ * TEXTURE Instructions: coord.xyzw
+ *
+ * Arguments should be packed into the args array. For example an SOA
+ * instructions that reads s0.x and s1.x args should look like this:
+ * args[0] = s0.x;
+ * args[1] = s1.x;
+ */
+ LLVMValueRef args[12];
+
+ /**
+ * Number of arguments in the args array.
+ */
+ unsigned arg_count;
+
+ /**
+ * The type output type of the opcode. This should be set in the
+ * lp_build_opcode_action::fetch_args function.
+ */
+ LLVMTypeRef dst_type;
+
+ /** This is used by the lp_build_opcode_action::fetch_args function to
+ * determine which channel to read from the opcode arguments. It also
+ * specifies which index of the output array should be written to by
+ * the lp_build_opcode_action::emit function. However, this value is
+ * usually ignored by any opcodes that are not TGSI_OUTPUT_COMPONENTWISE.
+ */
+ unsigned chan;
+
+ /** The lp_build_opcode_action::emit 'executes' the opcode and writes the
+ * results to this array.
+ */
+ LLVMValueRef output[4];
+
+ /**
+ * The current instruction that is being 'executed'.
+ */
+ const struct tgsi_full_instruction * inst;
+ const struct tgsi_opcode_info * info;
+};
+
+struct lp_build_opcode_action
+{
+
+ /**
+ * This function is responsible for doing 2-3 things:
+ * 1. Fetching the instruction arguments into the emit_data->args array.
+ * 2. Setting the number of arguments in emit_data->arg_count.
+ * 3. Setting the destination type in emit_data->dst_type (usually only
+ * necessary for opcodes that are TGSI_OUTPUT_COMPONENTWISE).
+ */
+ void (*fetch_args)(struct lp_build_tgsi_context *,
+ struct lp_build_emit_data *);
+
+
+ /**
+ * This function is responsible for emitting LLVM IR for a TGSI opcode.
+ * It should store the values it generates in the emit_data->output array
+ * and for TGSI_OUTPUT_COMPONENTWISE and TGSI_OUTPUT_REPLICATE instructions
+ * (and possibly others depending on the specific implementation), it should
+ * make sure to store the values in the array slot indexed by emit_data->chan.
+ */
+ void (*emit)(const struct lp_build_opcode_action *,
+ struct lp_build_tgsi_context *,
+ struct lp_build_emit_data *);
+
+ /**
+ * This variable can be used to store an intrinsic name, in case the TGSI
+ * opcode will be replaced by a target specific intrinsic. (There is a
+ * convenience function in lp_bld_tgsi.c called lp_build_tgsi_intrinsic()
+ * that can be assigned to lp_build_opcode_action::emit and used for
+ * generating intrinsics).
+ */
+ const char * intr_name;
+};
+
+/**
+ * This function initializes the bld_base->op_actions array with some
+ * generic operand actions.
+ */
+void
+lp_set_default_actions(
+ struct lp_build_tgsi_context * bld_base);
+
+/*
+ * This function initialize the bld_base->op_actions array with some
+ * operand actions that are intended only for use when generating
+ * instructions to be executed on a CPU.
+ */
+void
+lp_set_default_actions_cpu(
+ struct lp_build_tgsi_context * bld_base);
+
+#endif /* LP_BLD_ACTION_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
new file mode 100644
index 0000000..9ec4a9b
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -0,0 +1,409 @@
+/**************************************************************************
+ *
+ * Copyright 2010-2011 Advanced Micro Devices, Inc.
+ * Copyright 2010 VMware, Inc.
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "gallivm/lp_bld_tgsi.h"
+
+#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_gather.h"
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_intr.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "util/u_memory.h"
+
+/* The user is responsible for freeing list->instructions */
+unsigned lp_bld_tgsi_list_init(struct lp_build_tgsi_context * bld_base)
+{
+ bld_base->instructions = (struct tgsi_full_instruction *)
+ MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
+ if (!bld_base->instructions) {
+ return 0;
+ }
+ bld_base->max_instructions = LP_MAX_INSTRUCTIONS;
+ return 1;
+}
+
+
+unsigned lp_bld_tgsi_add_instruction(
+ struct lp_build_tgsi_context * bld_base,
+ struct tgsi_full_instruction *inst_to_add)
+{
+
+ if (bld_base->num_instructions == bld_base->max_instructions) {
+ struct tgsi_full_instruction *instructions;
+ instructions = REALLOC(bld_base->instructions, bld_base->max_instructions
+ * sizeof(struct tgsi_full_instruction),
+ (bld_base->max_instructions + LP_MAX_INSTRUCTIONS)
+ * sizeof(struct tgsi_full_instruction));
+ if (!instructions) {
+ return 0;
+ }
+ bld_base->instructions = instructions;
+ bld_base->max_instructions += LP_MAX_INSTRUCTIONS;
+ }
+ memcpy(bld_base->instructions + bld_base->num_instructions, inst_to_add,
+ sizeof(bld_base->instructions[0]));
+
+ bld_base->num_instructions++;
+
+ return 1;
+}
+
+
+/**
+ * This function assumes that all the args in emit_data have been set.
+ */
+static void
+lp_build_action_set_dst_type(
+ struct lp_build_emit_data * emit_data,
+ struct lp_build_tgsi_context *bld_base,
+ unsigned tgsi_opcode)
+{
+ if (emit_data->arg_count == 0) {
+ emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context);
+ } else {
+ /* XXX: Not all opcodes have the same src and dst types. */
+ emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
+ }
+}
+
+void
+lp_build_tgsi_intrinsic(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_context * base = &bld_base->base;
+ emit_data->output[emit_data->chan] = lp_build_intrinsic(
+ base->gallivm->builder, action->intr_name,
+ emit_data->dst_type, emit_data->args, emit_data->arg_count);
+}
+
+LLVMValueRef
+lp_build_emit_llvm(
+ struct lp_build_tgsi_context *bld_base,
+ unsigned tgsi_opcode,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_opcode_action * action = &bld_base->op_actions[tgsi_opcode];
+ /* XXX: Assert that this is a componentwise or replicate instruction */
+
+ lp_build_action_set_dst_type(emit_data, bld_base, tgsi_opcode);
+ emit_data->chan = 0;
+ assert(action->emit);
+ action->emit(action, bld_base, emit_data);
+ return emit_data->output[0];
+}
+
+LLVMValueRef
+lp_build_emit_llvm_unary(
+ struct lp_build_tgsi_context *bld_base,
+ unsigned tgsi_opcode,
+ LLVMValueRef arg0)
+{
+ struct lp_build_emit_data emit_data;
+ emit_data.arg_count = 1;
+ emit_data.args[0] = arg0;
+ return lp_build_emit_llvm(bld_base, tgsi_opcode, &emit_data);
+}
+
+LLVMValueRef
+lp_build_emit_llvm_binary(
+ struct lp_build_tgsi_context *bld_base,
+ unsigned tgsi_opcode,
+ LLVMValueRef arg0,
+ LLVMValueRef arg1)
+{
+ struct lp_build_emit_data emit_data;
+ emit_data.arg_count = 2;
+ emit_data.args[0] = arg0;
+ emit_data.args[1] = arg1;
+ return lp_build_emit_llvm(bld_base, tgsi_opcode, &emit_data);
+}
+
+LLVMValueRef
+lp_build_emit_llvm_ternary(
+ struct lp_build_tgsi_context *bld_base,
+ unsigned tgsi_opcode,
+ LLVMValueRef arg0,
+ LLVMValueRef arg1,
+ LLVMValueRef arg2)
+{
+ struct lp_build_emit_data emit_data;
+ emit_data.arg_count = 3;
+ emit_data.args[0] = arg0;
+ emit_data.args[1] = arg1;
+ emit_data.args[2] = arg2;
+ return lp_build_emit_llvm(bld_base, tgsi_opcode, &emit_data);
+}
+
+/**
+ * The default fetch implementation.
+ */
+void lp_build_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ unsigned src;
+ for (src = 0; src < emit_data->info->num_src; src++) {
+ emit_data->args[src] = lp_build_emit_fetch(bld_base, emit_data->inst, src,
+ emit_data->chan);
+ }
+ emit_data->arg_count = emit_data->info->num_src;
+ lp_build_action_set_dst_type(emit_data, bld_base,
+ emit_data->inst->Instruction.Opcode);
+}
+
+/* XXX: COMMENT
+ * It should be assumed that this function ignores writemasks
+ */
+boolean
+lp_build_tgsi_inst_llvm(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_instruction * inst)
+{
+ unsigned tgsi_opcode = inst->Instruction.Opcode;
+ const struct tgsi_opcode_info * info = tgsi_get_opcode_info(tgsi_opcode);
+ const struct lp_build_opcode_action * action =
+ &bld_base->op_actions[tgsi_opcode];
+ struct lp_build_emit_data emit_data;
+ unsigned chan_index;
+ LLVMValueRef val;
+
+ bld_base->pc++;
+
+ /* Ignore deprecated instructions */
+ switch (inst->Instruction.Opcode) {
+
+ case TGSI_OPCODE_RCC:
+ case TGSI_OPCODE_UP2H:
+ case TGSI_OPCODE_UP2US:
+ case TGSI_OPCODE_UP4B:
+ case TGSI_OPCODE_UP4UB:
+ case TGSI_OPCODE_X2D:
+ case TGSI_OPCODE_ARA:
+ case TGSI_OPCODE_BRA:
+ case TGSI_OPCODE_DIV:
+ case TGSI_OPCODE_PUSHA:
+ case TGSI_OPCODE_POPA:
+ case TGSI_OPCODE_I2F:
+ case TGSI_OPCODE_NOT:
+ case TGSI_OPCODE_SHL:
+ case TGSI_OPCODE_ISHR:
+ case TGSI_OPCODE_AND:
+ case TGSI_OPCODE_OR:
+ case TGSI_OPCODE_MOD:
+ case TGSI_OPCODE_XOR:
+ case TGSI_OPCODE_SAD:
+ case TGSI_OPCODE_TXF:
+ case TGSI_OPCODE_TXQ:
+ /* deprecated? */
+ assert(0);
+ return FALSE;
+ break;
+ }
+
+ /* Check if the opcode has been implemented */
+ if (!action->emit) {
+ return FALSE;
+ }
+
+ memset(&emit_data, 0, sizeof(emit_data));
+
+ assert(info->num_dst <= 1);
+ if (info->num_dst) {
+ TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ emit_data.output[chan_index] = bld_base->base.undef;
+ }
+ }
+
+ emit_data.inst = inst;
+ emit_data.info = info;
+
+ /* Emit the instructions */
+ if (info->output_type == TGSI_OUTPUT_COMPONENTWISE && bld_base->soa) {
+ TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
+ emit_data.chan = chan_index;
+ if (!action->fetch_args) {
+ lp_build_fetch_args(bld_base, &emit_data);
+ } else {
+ action->fetch_args(bld_base, &emit_data);
+ }
+ action->emit(action, bld_base, &emit_data);
+ }
+ } else {
+ emit_data.chan = LP_CHAN_ALL;
+ if (action->fetch_args) {
+ action->fetch_args(bld_base, &emit_data);
+ }
+ /* Make sure the output value is stored in emit_data.output[0], unless
+ * the opcode is channel dependent */
+ if (info->output_type != TGSI_OUTPUT_CHAN_DEPENDENT) {
+ emit_data.chan = 0;
+ }
+ action->emit(action, bld_base, &emit_data);
+
+ /* Replicate the output values */
+ if (info->output_type == TGSI_OUTPUT_REPLICATE && bld_base->soa) {
+ val = emit_data.output[0];
+ memset(emit_data.output, 0, sizeof(emit_data.output));
+ TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
+ emit_data.output[chan_index] = val;
+ }
+ }
+ }
+
+ if (info->num_dst > 0) {
+ bld_base->emit_store(bld_base, inst, info, emit_data.output);
+ }
+ return TRUE;
+}
+
+
+LLVMValueRef
+lp_build_emit_fetch(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_instruction *inst,
+ unsigned src_op,
+ const unsigned chan_index)
+{
+ const struct tgsi_full_src_register *reg = &inst->Src[src_op];
+ unsigned swizzle;
+ LLVMValueRef res;
+
+ if (chan_index == LP_CHAN_ALL) {
+ swizzle = ~0;
+ } else {
+ swizzle = tgsi_util_get_full_src_register_swizzle(reg, chan_index);
+ if (swizzle > 3) {
+ assert(0 && "invalid swizzle in emit_fetch()");
+ return bld_base->base.undef;
+ }
+ }
+
+ assert(reg->Register.Index <= bld_base->info->file_max[reg->Register.File]);
+
+ if (bld_base->emit_fetch_funcs[reg->Register.File]) {
+ res = bld_base->emit_fetch_funcs[reg->Register.File](bld_base, reg,
+ swizzle);
+ } else {
+ assert(0 && "invalid src register in emit_fetch()");
+ return bld_base->base.undef;
+ }
+
+ if (reg->Register.Absolute) {
+ res = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS, res);
+ }
+
+ if (reg->Register.Negate) {
+ res = lp_build_negate( &bld_base->base, res );
+ }
+
+ /*
+ * Swizzle the argument
+ */
+
+ if (swizzle == ~0) {
+ res = bld_base->emit_swizzle(bld_base, res,
+ reg->Register.SwizzleX,
+ reg->Register.SwizzleY,
+ reg->Register.SwizzleZ,
+ reg->Register.SwizzleW);
+ }
+
+ return res;
+
+}
+
+boolean
+lp_build_tgsi_llvm(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_token *tokens)
+{
+ struct tgsi_parse_context parse;
+
+ if (bld_base->emit_prologue) {
+ bld_base->emit_prologue(bld_base);
+ }
+
+ if (!lp_bld_tgsi_list_init(bld_base)) {
+ return FALSE;
+ }
+
+ tgsi_parse_init( &parse, tokens );
+
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ /* Inputs already interpolated */
+ bld_base->emit_declaration(bld_base, &parse.FullToken.FullDeclaration);
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ lp_bld_tgsi_add_instruction(bld_base, &parse.FullToken.FullInstruction);
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ bld_base->emit_immediate(bld_base, &parse.FullToken.FullImmediate);
+ break;
+
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ break;
+
+ default:
+ assert( 0 );
+ }
+ }
+
+ while (bld_base->pc != -1) {
+ struct tgsi_full_instruction *instr = bld_base->instructions +
+ bld_base->pc;
+ const struct tgsi_opcode_info *opcode_info =
+ tgsi_get_opcode_info(instr->Instruction.Opcode);
+ if (!lp_build_tgsi_inst_llvm(bld_base, instr)) {
+ _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
+ opcode_info->mnemonic);
+ return FALSE;
+ }
+ }
+
+ tgsi_parse_free(&parse);
+
+ FREE(bld_base->instructions);
+
+ if (bld_base->emit_epilogue) {
+ bld_base->emit_epilogue(bld_base);
+ }
+
+ return TRUE;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index 9713d10..3ca6719 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -1,5 +1,6 @@
/**************************************************************************
*
+ * Copyright 2010-2011 Advanced Micro Devices, Inc.
* Copyright 2009 VMware, Inc.
* All Rights Reserved.
*
@@ -30,21 +31,33 @@
* TGSI to LLVM IR translation.
*
* @author Jose Fonseca <jfonseca at vmware.com>
+ * @author Tom Stellard <thomas.stellard at amd.com>
*/
#ifndef LP_BLD_TGSI_H
#define LP_BLD_TGSI_H
#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_action.h"
+#include "gallivm/lp_bld_limits.h"
+#include "lp_bld_type.h"
#include "pipe/p_compiler.h"
#include "pipe/p_state.h"
+#include "tgsi/tgsi_exec.h"
#include "tgsi/tgsi_scan.h"
+#define LP_CHAN_ALL ~0
+
+#define LP_MAX_INSTRUCTIONS 256
+
+struct tgsi_full_declaration;
+struct tgsi_full_immediate;
+struct tgsi_full_instruction;
+struct tgsi_full_src_register;
+struct tgsi_opcode_info;
struct tgsi_token;
struct tgsi_shader_info;
-struct lp_type;
-struct lp_build_context;
struct lp_build_mask_context;
struct gallivm_state;
@@ -207,4 +220,328 @@ lp_build_system_values_array(struct gallivm_state *gallivm,
LLVMValueRef facing);
+struct lp_exec_mask {
+ struct lp_build_context *bld;
+
+ boolean has_mask;
+
+ LLVMTypeRef int_vec_type;
+
+ LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
+ int cond_stack_size;
+ LLVMValueRef cond_mask;
+
+ LLVMBasicBlockRef loop_block;
+ LLVMValueRef cont_mask;
+ LLVMValueRef break_mask;
+ LLVMValueRef break_var;
+ struct {
+ LLVMBasicBlockRef loop_block;
+ LLVMValueRef cont_mask;
+ LLVMValueRef break_mask;
+ LLVMValueRef break_var;
+ } loop_stack[LP_MAX_TGSI_NESTING];
+ int loop_stack_size;
+
+ LLVMValueRef ret_mask;
+ struct {
+ int pc;
+ LLVMValueRef ret_mask;
+ } call_stack[LP_MAX_TGSI_NESTING];
+ int call_stack_size;
+
+ LLVMValueRef exec_mask;
+};
+
+struct lp_build_tgsi_inst_list
+{
+ struct tgsi_full_instruction *instructions;
+ uint max_instructions;
+ uint num_instructions;
+};
+
+unsigned lp_bld_tgsi_list_init(struct lp_build_tgsi_context * bld_base);
+
+
+unsigned lp_bld_tgsi_add_instruction(
+ struct lp_build_tgsi_context * bld_base,
+ struct tgsi_full_instruction *inst_to_add);
+
+
+struct lp_build_tgsi_context;
+
+
+typedef LLVMValueRef (*lp_build_emit_fetch_fn)(struct lp_build_tgsi_context *,
+ const struct tgsi_full_src_register *,
+ unsigned);
+
+struct lp_build_tgsi_context
+{
+ struct lp_build_context base;
+
+ /** This array stores functions that are used to transform TGSI opcodes to
+ * LLVM instructions.
+ */
+ struct lp_build_opcode_action op_actions[TGSI_OPCODE_LAST];
+
+ /* TGSI_OPCODE_RSQ is defined as 1 / sqrt( abs(src0.x) ), rsq_action
+ * should compute 1 / sqrt (src0.x) */
+ struct lp_build_opcode_action rsq_action;
+
+ const struct tgsi_shader_info *info;
+
+ lp_build_emit_fetch_fn emit_fetch_funcs[TGSI_FILE_COUNT];
+
+ LLVMValueRef (*emit_swizzle)(struct lp_build_tgsi_context *,
+ LLVMValueRef, unsigned, unsigned, unsigned, unsigned);
+
+ void (*emit_store)(struct lp_build_tgsi_context *,
+ const struct tgsi_full_instruction *,
+ const struct tgsi_opcode_info *,
+ LLVMValueRef dst[4]);
+
+ void (*emit_declaration)(struct lp_build_tgsi_context *,
+ const struct tgsi_full_declaration *decl);
+
+ void (*emit_immediate)(struct lp_build_tgsi_context *,
+ const struct tgsi_full_immediate *imm);
+
+
+ /* Allow the user to store data in this structure rather than passing it
+ * to every function. */
+ void * userdata;
+
+ boolean soa;
+
+ int pc;
+
+ struct tgsi_full_instruction *instructions;
+ uint max_instructions;
+ uint num_instructions;
+
+ /** This function allows the user to insert some instructions at the
+ * beginning of the program. It is optional and does not need to be
+ * implemented.
+ */
+ void (*emit_prologue)(struct lp_build_tgsi_context*);
+
+ /** This function allows the user to insert some instructions at the end of
+ * the program. This callback is intended to be used for emitting
+ * instructions to handle the export for the output registers, but it can
+ * be used for any purpose. Implementing this function is optiona, but
+ * recommended.
+ */
+ void (*emit_epilogue)(struct lp_build_tgsi_context*);
+};
+
+struct lp_build_tgsi_soa_context
+{
+ struct lp_build_tgsi_context bld_base;
+
+ /* Builder for vector integer masks and indices */
+ struct lp_build_context uint_bld;
+
+ /* Builder for scalar elements of shader's data type (float) */
+ struct lp_build_context elem_bld;
+
+ LLVMValueRef consts_ptr;
+ const LLVMValueRef *pos;
+ const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS];
+ LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS];
+
+ const struct lp_build_sampler_soa *sampler;
+
+ LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][TGSI_NUM_CHANNELS];
+ LLVMValueRef temps[LP_MAX_TGSI_TEMPS][TGSI_NUM_CHANNELS];
+ LLVMValueRef addr[LP_MAX_TGSI_ADDRS][TGSI_NUM_CHANNELS];
+ LLVMValueRef preds[LP_MAX_TGSI_PREDS][TGSI_NUM_CHANNELS];
+
+ /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
+ * set in the indirect_files field.
+ * The temps[] array above is unused then.
+ */
+ LLVMValueRef temps_array;
+
+ /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is
+ * set in the indirect_files field.
+ * The outputs[] array above is unused then.
+ */
+ LLVMValueRef outputs_array;
+
+ /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is
+ * set in the indirect_files field.
+ * The inputs[] array above is unused then.
+ */
+ LLVMValueRef inputs_array;
+
+ LLVMValueRef system_values_array;
+
+ /** bitmask indicating which register files are accessed indirectly */
+ unsigned indirect_files;
+
+ struct lp_build_mask_context *mask;
+ struct lp_exec_mask exec_mask;
+
+ uint num_immediates;
+
+};
+
+void
+lp_emit_declaration_soa(
+ struct lp_build_tgsi_context *bld,
+ const struct tgsi_full_declaration *decl);
+
+void lp_emit_immediate_soa(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_immediate *imm);
+
+boolean
+lp_emit_instruction_soa(
+ struct lp_build_tgsi_soa_context *bld,
+ const struct tgsi_full_instruction *inst,
+ const struct tgsi_opcode_info *info);
+
+
+LLVMValueRef
+lp_get_temp_ptr_soa(
+ struct lp_build_tgsi_soa_context *bld,
+ unsigned index,
+ unsigned chan);
+
+LLVMValueRef
+lp_get_output_ptr(
+ struct lp_build_tgsi_soa_context *bld,
+ unsigned index,
+ unsigned chan);
+
+struct lp_build_tgsi_aos_context
+{
+ struct lp_build_tgsi_context bld_base;
+
+ /* Builder for integer masks and indices */
+ struct lp_build_context int_bld;
+
+ /*
+ * AoS swizzle used:
+ * - swizzles[0] = red index
+ * - swizzles[1] = green index
+ * - swizzles[2] = blue index
+ * - swizzles[3] = alpha index
+ */
+ unsigned char swizzles[4];
+ unsigned char inv_swizzles[4];
+
+ LLVMValueRef consts_ptr;
+ const LLVMValueRef *inputs;
+ LLVMValueRef *outputs;
+
+ struct lp_build_sampler_aos *sampler;
+
+ LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
+ LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
+ LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
+ LLVMValueRef preds[LP_MAX_TGSI_PREDS];
+
+ /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
+ * set in the indirect_files field.
+ * The temps[] array above is unused then.
+ */
+ LLVMValueRef temps_array;
+
+ /** bitmask indicating which register files are accessed indirectly */
+ unsigned indirect_files;
+
+};
+
+static INLINE struct lp_build_tgsi_soa_context *
+lp_soa_context(struct lp_build_tgsi_context *bld_base)
+{
+ return (struct lp_build_tgsi_soa_context *)bld_base;
+}
+
+static INLINE struct lp_build_tgsi_aos_context *
+lp_aos_context(struct lp_build_tgsi_context *bld_base)
+{
+ return (struct lp_build_tgsi_aos_context *)bld_base;
+}
+
+void
+lp_emit_declaration_aos(
+ struct lp_build_tgsi_aos_context *bld,
+ const struct tgsi_full_declaration *decl);
+
+
+boolean
+lp_emit_instruction_aos(
+ struct lp_build_tgsi_aos_context *bld,
+ const struct tgsi_full_instruction *inst,
+ const struct tgsi_opcode_info *info,
+ int *pc);
+
+void
+lp_emit_store_aos(
+ struct lp_build_tgsi_aos_context *bld,
+ const struct tgsi_full_instruction *inst,
+ unsigned index,
+ LLVMValueRef value);
+
+void lp_build_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data);
+
+LLVMValueRef
+lp_build_tgsi_inst_llvm_aos(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_instruction *inst);
+
+void
+lp_build_tgsi_intrinsic(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data);
+
+LLVMValueRef
+lp_build_emit_llvm(
+ struct lp_build_tgsi_context *bld_base,
+ unsigned tgsi_opcode,
+ struct lp_build_emit_data * emit_data);
+
+LLVMValueRef
+lp_build_emit_llvm_unary(
+ struct lp_build_tgsi_context *bld_base,
+ unsigned tgsi_opcode,
+ LLVMValueRef arg0);
+
+LLVMValueRef
+lp_build_emit_llvm_binary(
+ struct lp_build_tgsi_context *bld_base,
+ unsigned tgsi_opcode,
+ LLVMValueRef arg0,
+ LLVMValueRef arg1);
+
+LLVMValueRef
+lp_build_emit_llvm_ternary(
+ struct lp_build_tgsi_context *bld_base,
+ unsigned tgsi_opcode,
+ LLVMValueRef arg0,
+ LLVMValueRef arg1,
+ LLVMValueRef arg2);
+
+boolean
+lp_build_tgsi_inst_llvm(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_instruction *inst);
+
+LLVMValueRef
+lp_build_emit_fetch(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_instruction *inst,
+ unsigned src_op,
+ const unsigned chan_index);
+
+boolean
+lp_build_tgsi_llvm(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_token *tokens);
+
#endif /* LP_BLD_TGSI_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index a021efd..26cfffe 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -55,61 +55,15 @@
#include "lp_bld_flow.h"
#include "lp_bld_quad.h"
#include "lp_bld_tgsi.h"
-#include "lp_bld_limits.h"
#include "lp_bld_debug.h"
-#define LP_MAX_INSTRUCTIONS 256
-
-
-struct lp_build_tgsi_aos_context
-{
- struct lp_build_context base;
-
- /* Builder for integer masks and indices */
- struct lp_build_context int_bld;
-
- /*
- * AoS swizzle used:
- * - swizzles[0] = red index
- * - swizzles[1] = green index
- * - swizzles[2] = blue index
- * - swizzles[3] = alpha index
- */
- unsigned char swizzles[4];
- unsigned char inv_swizzles[4];
-
- LLVMValueRef consts_ptr;
- const LLVMValueRef *inputs;
- LLVMValueRef *outputs;
-
- struct lp_build_sampler_aos *sampler;
-
- LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
- LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
- LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
- LLVMValueRef preds[LP_MAX_TGSI_PREDS];
-
- /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
- * set in the indirect_files field.
- * The temps[] array above is unused then.
- */
- LLVMValueRef temps_array;
-
- /** bitmask indicating which register files are accessed indirectly */
- unsigned indirect_files;
-
- struct tgsi_full_instruction *instructions;
- uint max_instructions;
-};
-
-
/**
* Wrapper around lp_build_swizzle_aos which translates swizzles to another
* ordering.
*/
static LLVMValueRef
-swizzle_aos(struct lp_build_tgsi_aos_context *bld,
+swizzle_aos(struct lp_build_tgsi_context *bld_base,
LLVMValueRef a,
unsigned swizzle_x,
unsigned swizzle_y,
@@ -117,6 +71,7 @@ swizzle_aos(struct lp_build_tgsi_aos_context *bld,
unsigned swizzle_w)
{
unsigned char swizzles[4];
+ struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
assert(swizzle_x < 4);
assert(swizzle_y < 4);
@@ -128,7 +83,7 @@ swizzle_aos(struct lp_build_tgsi_aos_context *bld,
swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
- return lp_build_swizzle_aos(&bld->base, a, swizzles);
+ return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
}
@@ -138,149 +93,132 @@ swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
unsigned chan)
{
chan = bld->swizzles[chan];
- return lp_build_swizzle_scalar_aos(&bld->base, a, chan);
+ return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan);
}
-/**
- * Register fetch.
- */
static LLVMValueRef
-emit_fetch(
- struct lp_build_tgsi_aos_context *bld,
- const struct tgsi_full_instruction *inst,
- unsigned src_op)
+emit_fetch_constant(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_src_register * reg,
+ const unsigned swizzle)
{
- LLVMBuilderRef builder = bld->base.gallivm->builder;
- struct lp_type type = bld->base.type;
- const struct tgsi_full_src_register *reg = &inst->Src[src_op];
+ struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ struct lp_type type = bld_base->base.type;
LLVMValueRef res;
unsigned chan;
assert(!reg->Register.Indirect);
/*
- * Fetch the from the register file.
+ * Get the constants components
*/
- switch (reg->Register.File) {
- case TGSI_FILE_CONSTANT:
- /*
- * Get the constants components
- */
-
- res = bld->base.undef;
- for (chan = 0; chan < 4; ++chan) {
- LLVMValueRef index;
- LLVMValueRef scalar_ptr;
- LLVMValueRef scalar;
- LLVMValueRef swizzle;
-
- index = lp_build_const_int32(bld->base.gallivm, reg->Register.Index * 4 + chan);
-
- scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
- &index, 1, "");
+ res = bld->bld_base.base.undef;
+ for (chan = 0; chan < 4; ++chan) {
+ LLVMValueRef index;
+ LLVMValueRef scalar_ptr;
+ LLVMValueRef scalar;
+ LLVMValueRef swizzle;
- scalar = LLVMBuildLoad(builder, scalar_ptr, "");
+ index = lp_build_const_int32(bld->bld_base.base.gallivm,
+ reg->Register.Index * 4 + chan);
- lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
+ scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
- /*
- * NOTE: constants array is always assumed to be RGBA
- */
+ scalar = LLVMBuildLoad(builder, scalar_ptr, "");
- swizzle = lp_build_const_int32(bld->base.gallivm, chan);
-
- res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
- }
+ lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
/*
- * Broadcast the first quaternion to all others.
- *
- * XXX: could be factored into a reusable function.
+ * NOTE: constants array is always assumed to be RGBA
*/
- if (type.length > 4) {
- LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
- unsigned i;
+ swizzle = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
- for (chan = 0; chan < 4; ++chan) {
- shuffles[chan] = lp_build_const_int32(bld->base.gallivm, chan);
- }
-
- for (i = 4; i < type.length; ++i) {
- shuffles[i] = shuffles[i % 4];
- }
+ res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
+ }
- res = LLVMBuildShuffleVector(builder,
- res, bld->base.undef,
- LLVMConstVector(shuffles, type.length),
- "");
- }
- break;
+ /*
+ * Broadcast the first quaternion to all others.
+ *
+ * XXX: could be factored into a reusable function.
+ */
- case TGSI_FILE_IMMEDIATE:
- res = bld->immediates[reg->Register.Index];
- assert(res);
- break;
+ if (type.length > 4) {
+ LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
- case TGSI_FILE_INPUT:
- res = bld->inputs[reg->Register.Index];
- assert(res);
- break;
-
- case TGSI_FILE_TEMPORARY:
- {
- LLVMValueRef temp_ptr;
- temp_ptr = bld->temps[reg->Register.Index];
- res = LLVMBuildLoad(builder, temp_ptr, "");
- if (!res)
- return bld->base.undef;
+ for (chan = 0; chan < 4; ++chan) {
+ shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
}
- break;
- default:
- assert(0 && "invalid src register in emit_fetch()");
- return bld->base.undef;
- }
-
- /*
- * Apply sign modifier.
- */
+ for (i = 4; i < type.length; ++i) {
+ shuffles[i] = shuffles[i % 4];
+ }
- if (reg->Register.Absolute) {
- res = lp_build_abs(&bld->base, res);
+ res = LLVMBuildShuffleVector(builder,
+ res, bld->bld_base.base.undef,
+ LLVMConstVector(shuffles, type.length),
+ "");
}
+ return res;
+}
- if(reg->Register.Negate) {
- res = lp_build_negate(&bld->base, res);
- }
+static LLVMValueRef
+emit_fetch_immediate(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_src_register * reg,
+ const unsigned swizzle)
+{
+ struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
+ LLVMValueRef res = bld->immediates[reg->Register.Index];
+ assert(res);
+ return res;
+}
- /*
- * Swizzle the argument
- */
+static LLVMValueRef
+emit_fetch_input(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_src_register * reg,
+ const unsigned swizzle)
+{
+ struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
+ LLVMValueRef res = bld->inputs[reg->Register.Index];
+ assert(!reg->Register.Indirect);
+ assert(res);
+ return res;
+}
- res = swizzle_aos(bld, res,
- reg->Register.SwizzleX,
- reg->Register.SwizzleY,
- reg->Register.SwizzleZ,
- reg->Register.SwizzleW);
+static LLVMValueRef
+emit_fetch_temporary(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_src_register * reg,
+ const unsigned swizzle)
+{
+ struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
+ LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
+ assert(!reg->Register.Indirect);
+ if (!res)
+ return bld->bld_base.base.undef;
return res;
}
-
/**
* Register store.
*/
-static void
-emit_store(
+void
+lp_emit_store_aos(
struct lp_build_tgsi_aos_context *bld,
const struct tgsi_full_instruction *inst,
unsigned index,
LLVMValueRef value)
{
- LLVMBuilderRef builder = bld->base.gallivm->builder;
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
const struct tgsi_full_dst_register *reg = &inst->Dst[index];
LLVMValueRef mask = NULL;
LLVMValueRef ptr;
@@ -294,13 +232,13 @@ emit_store(
break;
case TGSI_SAT_ZERO_ONE:
- value = lp_build_max(&bld->base, value, bld->base.zero);
- value = lp_build_min(&bld->base, value, bld->base.one);
+ value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
+ value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
break;
case TGSI_SAT_MINUS_PLUS_ONE:
- value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0));
- value = lp_build_min(&bld->base, value, bld->base.one);
+ value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
+ value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
break;
default:
@@ -335,6 +273,8 @@ emit_store(
return;
}
+ if (!ptr)
+ return;
/*
* Predicate
*/
@@ -350,17 +290,17 @@ emit_store(
/*
* Convert the value to an integer mask.
*/
- pred = lp_build_compare(bld->base.gallivm,
- bld->base.type,
+ pred = lp_build_compare(bld->bld_base.base.gallivm,
+ bld->bld_base.base.type,
PIPE_FUNC_NOTEQUAL,
pred,
- bld->base.zero);
+ bld->bld_base.base.zero);
if (inst->Predicate.Negate) {
pred = LLVMBuildNot(builder, pred, "");
}
- pred = swizzle_aos(bld, pred,
+ pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
inst->Predicate.SwizzleX,
inst->Predicate.SwizzleY,
inst->Predicate.SwizzleZ,
@@ -380,7 +320,7 @@ emit_store(
if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
LLVMValueRef writemask;
- writemask = lp_build_const_mask_aos(bld->base.gallivm, bld->base.type,
+ writemask = lp_build_const_mask_aos(bld->bld_base.base.gallivm, bld->bld_base.base.type,
reg->Register.WriteMask);
if (mask) {
@@ -394,7 +334,7 @@ emit_store(
LLVMValueRef orig_value;
orig_value = LLVMBuildLoad(builder, ptr, "");
- value = lp_build_select(&bld->base,
+ value = lp_build_select(&bld->bld_base.base,
mask, value, orig_value);
}
@@ -419,44 +359,44 @@ emit_tex(struct lp_build_tgsi_aos_context *bld,
if (!bld->sampler) {
_debug_printf("warning: found texture instruction but no sampler generator supplied\n");
- return bld->base.undef;
+ return bld->bld_base.base.undef;
}
target = inst->Texture.Texture;
- coords = emit_fetch( bld, inst, 0 );
+ coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
- ddx = emit_fetch( bld, inst, 1 );
- ddy = emit_fetch( bld, inst, 2 );
+ ddx = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
+ ddy = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
unit = inst->Src[3].Register.Index;
} else {
#if 0
- ddx = lp_build_ddx( &bld->base, coords );
- ddy = lp_build_ddy( &bld->base, coords );
+ ddx = lp_build_ddx( &bld->bld_base.base, coords );
+ ddy = lp_build_ddy( &bld->bld_base.base, coords );
#else
/* TODO */
- ddx = bld->base.one;
- ddy = bld->base.one;
+ ddx = bld->bld_base.base.one;
+ ddy = bld->bld_base.base.one;
#endif
unit = inst->Src[1].Register.Index;
}
return bld->sampler->emit_fetch_texel(bld->sampler,
- &bld->base,
+ &bld->bld_base.base,
target, unit,
coords, ddx, ddy,
modifier);
}
-static void
-emit_declaration(
+void
+lp_emit_declaration_aos(
struct lp_build_tgsi_aos_context *bld,
const struct tgsi_full_declaration *decl)
{
- struct gallivm_state *gallivm = bld->base.gallivm;
- LLVMTypeRef vec_type = lp_build_vec_type(bld->base.gallivm, bld->base.type);
+ struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+ LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
unsigned first = decl->Range.First;
unsigned last = decl->Range.Last;
@@ -468,7 +408,7 @@ emit_declaration(
assert(idx < LP_MAX_TGSI_TEMPS);
if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
- bld->temps_array = lp_build_array_alloca(bld->base.gallivm,
+ bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
vec_type, array_size, "");
} else {
bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
@@ -501,8 +441,8 @@ emit_declaration(
* Emit LLVM for one TGSI instruction.
* \param return TRUE for success, FALSE otherwise
*/
-static boolean
-emit_instruction(
+boolean
+lp_emit_instruction_aos(
struct lp_build_tgsi_aos_context *bld,
const struct tgsi_full_instruction *inst,
const struct tgsi_opcode_info *info,
@@ -527,17 +467,17 @@ emit_instruction(
assert(info->num_dst <= 1);
if (info->num_dst) {
- dst0 = bld->base.undef;
+ dst0 = bld->bld_base.base.undef;
}
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
- src0 = emit_fetch(bld, inst, 0);
- dst0 = lp_build_floor(&bld->base, src0);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ dst0 = lp_build_floor(&bld->bld_base.base, src0);
break;
case TGSI_OPCODE_MOV:
- dst0 = emit_fetch(bld, inst, 0);
+ dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
break;
case TGSI_OPCODE_LIT:
@@ -545,15 +485,15 @@ emit_instruction(
case TGSI_OPCODE_RCP:
/* TGSI_OPCODE_RECIP */
- src0 = emit_fetch(bld, inst, 0);
- dst0 = lp_build_rcp(&bld->base, src0);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ dst0 = lp_build_rcp(&bld->bld_base.base, src0);
break;
case TGSI_OPCODE_RSQ:
/* TGSI_OPCODE_RECIPSQRT */
- src0 = emit_fetch(bld, inst, 0);
- tmp0 = lp_build_abs(&bld->base, src0);
- dst0 = lp_build_rsqrt(&bld->base, tmp0);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0);
+ dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
break;
case TGSI_OPCODE_EXP:
@@ -563,15 +503,15 @@ emit_instruction(
return FALSE;
case TGSI_OPCODE_MUL:
- src0 = emit_fetch(bld, inst, 0);
- src1 = emit_fetch(bld, inst, 1);
- dst0 = lp_build_mul(&bld->base, src0, src1);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+ dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
break;
case TGSI_OPCODE_ADD:
- src0 = emit_fetch(bld, inst, 0);
- src1 = emit_fetch(bld, inst, 1);
- dst0 = lp_build_add(&bld->base, src0, src1);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+ dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
break;
case TGSI_OPCODE_DP3:
@@ -586,121 +526,116 @@ emit_instruction(
return FALSE;
case TGSI_OPCODE_MIN:
- src0 = emit_fetch(bld, inst, 0);
- src1 = emit_fetch(bld, inst, 1);
- dst0 = lp_build_max(&bld->base, src0, src1);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+ dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
break;
case TGSI_OPCODE_MAX:
- src0 = emit_fetch(bld, inst, 0);
- src1 = emit_fetch(bld, inst, 1);
- dst0 = lp_build_max(&bld->base, src0, src1);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+ dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
break;
case TGSI_OPCODE_SLT:
/* TGSI_OPCODE_SETLT */
- src0 = emit_fetch(bld, inst, 0);
- src1 = emit_fetch(bld, inst, 1);
- tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1);
- dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+ tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
+ dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
break;
case TGSI_OPCODE_SGE:
/* TGSI_OPCODE_SETGE */
- src0 = emit_fetch(bld, inst, 0);
- src1 = emit_fetch(bld, inst, 1);
- tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1);
- dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+ tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
+ dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
break;
case TGSI_OPCODE_MAD:
/* TGSI_OPCODE_MADD */
- src0 = emit_fetch(bld, inst, 0);
- src1 = emit_fetch(bld, inst, 1);
- src2 = emit_fetch(bld, inst, 2);
- tmp0 = lp_build_mul(&bld->base, src0, src1);
- dst0 = lp_build_add(&bld->base, tmp0, src2);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+ src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
+ tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
+ dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
break;
case TGSI_OPCODE_SUB:
- src0 = emit_fetch(bld, inst, 0);
- src1 = emit_fetch(bld, inst, 1);
- dst0 = lp_build_sub(&bld->base, src0, src1);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+ dst0 = lp_build_sub(&bld->bld_base.base, src0, src1);
break;
case TGSI_OPCODE_LRP:
- src0 = emit_fetch(bld, inst, 0);
- src1 = emit_fetch(bld, inst, 1);
- src2 = emit_fetch(bld, inst, 2);
- tmp0 = lp_build_sub(&bld->base, src1, src2);
- tmp0 = lp_build_mul(&bld->base, src0, tmp0);
- dst0 = lp_build_add(&bld->base, tmp0, src2);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+ src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
+ tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
+ tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
+ dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
break;
case TGSI_OPCODE_CND:
- src0 = emit_fetch(bld, inst, 0);
- src1 = emit_fetch(bld, inst, 1);
- src2 = emit_fetch(bld, inst, 2);
- tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5);
- tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2, tmp1);
- dst0 = lp_build_select(&bld->base, tmp0, src0, src1);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+ src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
+ tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5);
+ tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1);
+ dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1);
break;
case TGSI_OPCODE_DP2A:
return FALSE;
case TGSI_OPCODE_FRC:
- src0 = emit_fetch(bld, inst, 0);
- tmp0 = lp_build_floor(&bld->base, src0);
- dst0 = lp_build_sub(&bld->base, src0, tmp0);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ tmp0 = lp_build_floor(&bld->bld_base.base, src0);
+ dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
break;
case TGSI_OPCODE_CLAMP:
- src0 = emit_fetch(bld, inst, 0);
- src1 = emit_fetch(bld, inst, 1);
- src2 = emit_fetch(bld, inst, 2);
- tmp0 = lp_build_max(&bld->base, src0, src1);
- dst0 = lp_build_min(&bld->base, tmp0, src2);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+ src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
+ tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
+ dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
break;
case TGSI_OPCODE_FLR:
- src0 = emit_fetch(bld, inst, 0);
- dst0 = lp_build_floor(&bld->base, src0);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ dst0 = lp_build_floor(&bld->bld_base.base, src0);
break;
case TGSI_OPCODE_ROUND:
- src0 = emit_fetch(bld, inst, 0);
- dst0 = lp_build_round(&bld->base, src0);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ dst0 = lp_build_round(&bld->bld_base.base, src0);
break;
case TGSI_OPCODE_EX2:
- src0 = emit_fetch(bld, inst, 0);
- tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
- dst0 = lp_build_exp2(&bld->base, tmp0);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X);
+ dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
break;
case TGSI_OPCODE_LG2:
- src0 = emit_fetch(bld, inst, 0);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
- dst0 = lp_build_log2(&bld->base, tmp0);
+ dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
break;
case TGSI_OPCODE_POW:
- src0 = emit_fetch(bld, inst, 0);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
- src1 = emit_fetch(bld, inst, 1);
+ src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
- dst0 = lp_build_pow(&bld->base, src0, src1);
+ dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
break;
case TGSI_OPCODE_XPD:
return FALSE;
- case TGSI_OPCODE_ABS:
- src0 = emit_fetch(bld, inst, 0);
- dst0 = lp_build_abs(&bld->base, src0);
- break;
-
case TGSI_OPCODE_RCC:
/* deprecated? */
assert(0);
@@ -710,9 +645,9 @@ emit_instruction(
return FALSE;
case TGSI_OPCODE_COS:
- src0 = emit_fetch(bld, inst, 0);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
- dst0 = lp_build_cos(&bld->base, tmp0);
+ dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
break;
case TGSI_OPCODE_DDX:
@@ -748,45 +683,45 @@ emit_instruction(
return FALSE;
case TGSI_OPCODE_SEQ:
- src0 = emit_fetch(bld, inst, 0);
- src1 = emit_fetch(bld, inst, 1);
- tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1);
- dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+ tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
+ dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
break;
case TGSI_OPCODE_SFL:
- dst0 = bld->base.zero;
+ dst0 = bld->bld_base.base.zero;
break;
case TGSI_OPCODE_SGT:
- src0 = emit_fetch(bld, inst, 0);
- src1 = emit_fetch(bld, inst, 1);
- tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0, src1);
- dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+ tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
+ dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
break;
case TGSI_OPCODE_SIN:
- src0 = emit_fetch(bld, inst, 0);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
- dst0 = lp_build_sin(&bld->base, tmp0);
+ dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
break;
case TGSI_OPCODE_SLE:
- src0 = emit_fetch(bld, inst, 0);
- src1 = emit_fetch(bld, inst, 1);
- tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1);
- dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+ tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
+ dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
break;
case TGSI_OPCODE_SNE:
- src0 = emit_fetch(bld, inst, 0);
- src1 = emit_fetch(bld, inst, 1);
- tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0, src1);
- dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+ tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
+ dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
break;
case TGSI_OPCODE_STR:
- dst0 = bld->base.one;
+ dst0 = bld->bld_base.base.one;
break;
case TGSI_OPCODE_TEX:
@@ -834,8 +769,8 @@ emit_instruction(
break;
case TGSI_OPCODE_ARR:
- src0 = emit_fetch(bld, inst, 0);
- dst0 = lp_build_round(&bld->base, src0);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ dst0 = lp_build_round(&bld->bld_base.base, src0);
break;
case TGSI_OPCODE_BRA:
@@ -856,16 +791,16 @@ emit_instruction(
case TGSI_OPCODE_SSG:
/* TGSI_OPCODE_SGN */
- tmp0 = emit_fetch(bld, inst, 0);
- dst0 = lp_build_sgn(&bld->base, tmp0);
+ tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
break;
case TGSI_OPCODE_CMP:
- src0 = emit_fetch(bld, inst, 0);
- src1 = emit_fetch(bld, inst, 1);
- src2 = emit_fetch(bld, inst, 2);
- tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, bld->base.zero);
- dst0 = lp_build_select(&bld->base, tmp0, src1, src2);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+ src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
+ tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
+ dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
break;
case TGSI_OPCODE_SCS:
@@ -934,8 +869,8 @@ emit_instruction(
break;
case TGSI_OPCODE_CEIL:
- src0 = emit_fetch(bld, inst, 0);
- dst0 = lp_build_ceil(&bld->base, src0);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ dst0 = lp_build_ceil(&bld->bld_base.base, src0);
break;
case TGSI_OPCODE_I2F:
@@ -951,8 +886,8 @@ emit_instruction(
break;
case TGSI_OPCODE_TRUNC:
- src0 = emit_fetch(bld, inst, 0);
- dst0 = lp_build_trunc(&bld->base, src0);
+ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+ dst0 = lp_build_trunc(&bld->bld_base.base, src0);
break;
case TGSI_OPCODE_SHL:
@@ -1028,7 +963,7 @@ emit_instruction(
}
if (info->num_dst) {
- emit_store(bld, inst, 0, dst0);
+ lp_emit_store_aos(bld, inst, 0, dst0);
}
return TRUE;
@@ -1049,13 +984,12 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm,
struct lp_build_tgsi_aos_context bld;
struct tgsi_parse_context parse;
uint num_immediates = 0;
- uint num_instructions = 0;
unsigned chan;
int pc = 0;
/* Setup build context */
memset(&bld, 0, sizeof bld);
- lp_build_context_init(&bld.base, gallivm, type);
+ lp_build_context_init(&bld.bld_base.base, gallivm, type);
lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
for (chan = 0; chan < 4; ++chan) {
@@ -1068,11 +1002,17 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm,
bld.consts_ptr = consts_ptr;
bld.sampler = sampler;
bld.indirect_files = info->indirect_files;
- bld.instructions = (struct tgsi_full_instruction *)
- MALLOC(LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction));
- bld.max_instructions = LP_MAX_INSTRUCTIONS;
+ bld.bld_base.emit_swizzle = swizzle_aos;
+
+ bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
+ bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
+ bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
+ bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
- if (!bld.instructions) {
+ /* Set opcode actions */
+ lp_set_default_actions_cpu(&bld.bld_base);
+
+ if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
return;
}
@@ -1084,33 +1024,13 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm,
switch(parse.FullToken.Token.Type) {
case TGSI_TOKEN_TYPE_DECLARATION:
/* Inputs already interpolated */
- emit_declaration(&bld, &parse.FullToken.FullDeclaration);
+ lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
- {
- /* save expanded instruction */
- if (num_instructions == bld.max_instructions) {
- struct tgsi_full_instruction *instructions;
- instructions = REALLOC(bld.instructions,
- bld.max_instructions
- * sizeof(struct tgsi_full_instruction),
- (bld.max_instructions + LP_MAX_INSTRUCTIONS)
- * sizeof(struct tgsi_full_instruction));
- if (!instructions) {
- break;
- }
- bld.instructions = instructions;
- bld.max_instructions += LP_MAX_INSTRUCTIONS;
- }
-
- memcpy(bld.instructions + num_instructions,
- &parse.FullToken.FullInstruction,
- sizeof(bld.instructions[0]));
-
- num_instructions++;
- }
-
+ /* save expanded instruction */
+ lp_bld_tgsi_add_instruction(&bld.bld_base,
+ &parse.FullToken.FullInstruction);
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
@@ -1144,10 +1064,10 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm,
}
while (pc != -1) {
- struct tgsi_full_instruction *instr = bld.instructions + pc;
+ struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
const struct tgsi_opcode_info *opcode_info =
tgsi_get_opcode_info(instr->Instruction.Opcode);
- if (!emit_instruction(&bld, instr, opcode_info, &pc))
+ if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
_debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
opcode_info->mnemonic);
}
@@ -1168,6 +1088,5 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm,
LLVMDumpModule(module);
}
- FREE(bld.instructions);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 1ad0b74..f7e15db 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -47,6 +47,7 @@
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_scan.h"
+#include "lp_bld_action.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_arit.h"
@@ -63,97 +64,6 @@
#include "lp_bld_printf.h"
-#define NUM_CHANNELS 4
-
-#define LP_MAX_INSTRUCTIONS 256
-
-
-struct lp_exec_mask {
- struct lp_build_context *bld;
-
- boolean has_mask;
-
- LLVMTypeRef int_vec_type;
-
- LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
- int cond_stack_size;
- LLVMValueRef cond_mask;
-
- LLVMBasicBlockRef loop_block;
- LLVMValueRef cont_mask;
- LLVMValueRef break_mask;
- LLVMValueRef break_var;
- struct {
- LLVMBasicBlockRef loop_block;
- LLVMValueRef cont_mask;
- LLVMValueRef break_mask;
- LLVMValueRef break_var;
- } loop_stack[LP_MAX_TGSI_NESTING];
- int loop_stack_size;
-
- LLVMValueRef ret_mask;
- struct {
- int pc;
- LLVMValueRef ret_mask;
- } call_stack[LP_MAX_TGSI_NESTING];
- int call_stack_size;
-
- LLVMValueRef exec_mask;
-};
-
-struct lp_build_tgsi_soa_context
-{
- struct lp_build_context base;
-
- /* Builder for vector integer masks and indices */
- struct lp_build_context uint_bld;
-
- /* Builder for scalar elements of shader's data type (float) */
- struct lp_build_context elem_bld;
-
- LLVMValueRef consts_ptr;
- const LLVMValueRef *pos;
- const LLVMValueRef (*inputs)[NUM_CHANNELS];
- LLVMValueRef (*outputs)[NUM_CHANNELS];
-
- const struct lp_build_sampler_soa *sampler;
-
- LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
- LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
- LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
- LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
-
- /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
- * set in the indirect_files field.
- * The temps[] array above is unused then.
- */
- LLVMValueRef temps_array;
-
- /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is
- * set in the indirect_files field.
- * The outputs[] array above is unused then.
- */
- LLVMValueRef outputs_array;
-
- /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is
- * set in the indirect_files field.
- * The inputs[] array above is unused then.
- */
- LLVMValueRef inputs_array;
-
- LLVMValueRef system_values_array;
-
- const struct tgsi_shader_info *info;
- /** bitmask indicating which register files are accessed indirectly */
- unsigned indirect_files;
-
- struct lp_build_mask_context *mask;
- struct lp_exec_mask exec_mask;
-
- struct tgsi_full_instruction *instructions;
- uint max_instructions;
-};
-
static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
{
mask->bld = bld;
@@ -438,15 +348,15 @@ static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
* \param index which temporary register
* \param chan which channel of the temp register.
*/
-static LLVMValueRef
-get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
+LLVMValueRef
+lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
unsigned index,
unsigned chan)
{
- LLVMBuilderRef builder = bld->base.gallivm->builder;
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
assert(chan < 4);
if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
- LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, index * 4 + chan);
+ LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
}
else {
@@ -460,15 +370,15 @@ get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
* \param index which output register
* \param chan which channel of the output register.
*/
-static LLVMValueRef
-get_output_ptr(struct lp_build_tgsi_soa_context *bld,
+LLVMValueRef
+lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
unsigned index,
unsigned chan)
{
- LLVMBuilderRef builder = bld->base.gallivm->builder;
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
assert(chan < 4);
if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
- LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm,
+ LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
index * 4 + chan);
return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
}
@@ -487,15 +397,15 @@ build_gather(struct lp_build_tgsi_soa_context *bld,
LLVMValueRef base_ptr,
LLVMValueRef indexes)
{
- LLVMBuilderRef builder = bld->base.gallivm->builder;
- LLVMValueRef res = bld->base.undef;
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+ LLVMValueRef res = bld->bld_base.base.undef;
unsigned i;
/*
* Loop over elements of index_vec, load scalar value, insert it into 'res'.
*/
- for (i = 0; i < bld->base.type.length; i++) {
- LLVMValueRef ii = lp_build_const_int32(bld->base.gallivm, i);
+ for (i = 0; i < bld->bld_base.base.type.length; i++) {
+ LLVMValueRef ii = lp_build_const_int32(bld->bld_base.base.gallivm, i);
LLVMValueRef index = LLVMBuildExtractElement(builder,
indexes, ii, "");
LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
@@ -520,7 +430,7 @@ emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
struct lp_exec_mask *mask,
LLVMValueRef pred)
{
- struct gallivm_state *gallivm = bld->base.gallivm;
+ struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
unsigned i;
@@ -537,7 +447,7 @@ emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
/*
* Loop over elements of index_vec, store scalar value.
*/
- for (i = 0; i < bld->base.type.length; i++) {
+ for (i = 0; i < bld->bld_base.base.type.length; i++) {
LLVMValueRef ii = lp_build_const_int32(gallivm, i);
LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
@@ -573,7 +483,7 @@ get_indirect_index(struct lp_build_tgsi_soa_context *bld,
unsigned reg_file, unsigned reg_index,
const struct tgsi_src_register *indirect_reg)
{
- LLVMBuilderRef builder = bld->base.gallivm->builder;
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
struct lp_build_context *uint_bld = &bld->uint_bld;
/* always use X component of address register */
unsigned swizzle = indirect_reg->SwizzleX;
@@ -584,7 +494,7 @@ get_indirect_index(struct lp_build_tgsi_soa_context *bld,
assert(bld->indirect_files & (1 << reg_file));
- base = lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, reg_index);
+ base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
assert(swizzle < 4);
rel = LLVMBuildLoad(builder,
@@ -598,9 +508,9 @@ get_indirect_index(struct lp_build_tgsi_soa_context *bld,
index = lp_build_add(uint_bld, base, rel);
- max_index = lp_build_const_int_vec(bld->base.gallivm,
+ max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
uint_bld->type,
- bld->info->file_max[reg_file]);
+ bld->bld_base.info->file_max[reg_file]);
assert(!uint_bld->type.sign);
index = lp_build_min(uint_bld, index, max_index);
@@ -608,176 +518,198 @@ get_indirect_index(struct lp_build_tgsi_soa_context *bld,
return index;
}
-
-/**
- * Register fetch.
- */
static LLVMValueRef
-emit_fetch(
- struct lp_build_tgsi_soa_context *bld,
- const struct tgsi_full_instruction *inst,
- unsigned src_op,
- const unsigned chan_index )
+emit_fetch_constant(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_src_register * reg,
+ const unsigned swizzle)
{
- struct gallivm_state *gallivm = bld->base.gallivm;
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
struct lp_build_context *uint_bld = &bld->uint_bld;
- const struct tgsi_full_src_register *reg = &inst->Src[src_op];
- const unsigned swizzle =
- tgsi_util_get_full_src_register_swizzle(reg, chan_index);
- LLVMValueRef res;
LLVMValueRef indirect_index = NULL;
- if (swizzle > 3) {
- assert(0 && "invalid swizzle in emit_fetch()");
- return bld->base.undef;
- }
+ /* XXX: Handle fetching xyzw components as a vector */
+ assert(swizzle != ~0);
if (reg->Register.Indirect) {
indirect_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
®->Indirect);
- } else {
- assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
}
- switch (reg->Register.File) {
- case TGSI_FILE_CONSTANT:
- if (reg->Register.Indirect) {
- LLVMValueRef swizzle_vec =
- lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle);
- LLVMValueRef index_vec; /* index into the const buffer */
+ if (reg->Register.Indirect) {
+ LLVMValueRef swizzle_vec =
+ lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
+ LLVMValueRef index_vec; /* index into the const buffer */
- /* index_vec = indirect_index * 4 + swizzle */
- index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
- index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
+ /* index_vec = indirect_index * 4 + swizzle */
+ index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
+ index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
- /* Gather values from the constant buffer */
- res = build_gather(bld, bld->consts_ptr, index_vec);
- }
- else {
- LLVMValueRef index; /* index into the const buffer */
- LLVMValueRef scalar, scalar_ptr;
+ /* Gather values from the constant buffer */
+ return build_gather(bld, bld->consts_ptr, index_vec);
+ }
+ else {
+ LLVMValueRef index; /* index into the const buffer */
+ LLVMValueRef scalar, scalar_ptr;
- index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
+ index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
- scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
+ scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
&index, 1, "");
- scalar = LLVMBuildLoad(builder, scalar_ptr, "");
+ scalar = LLVMBuildLoad(builder, scalar_ptr, "");
- res = lp_build_broadcast_scalar(&bld->base, scalar);
- }
- break;
+ return lp_build_broadcast_scalar(&bld->bld_base.base, scalar);
+ }
+}
- case TGSI_FILE_IMMEDIATE:
- res = bld->immediates[reg->Register.Index][swizzle];
- assert(res);
- break;
+static LLVMValueRef
+emit_fetch_immediate(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_src_register * reg,
+ const unsigned swizzle)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ LLVMValueRef res = bld->immediates[reg->Register.Index][swizzle];
+ assert(res);
+ return res;
+}
- case TGSI_FILE_INPUT:
- if (reg->Register.Indirect) {
- LLVMValueRef swizzle_vec =
- lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
- LLVMValueRef length_vec =
- lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length);
- LLVMValueRef index_vec; /* index into the const buffer */
- LLVMValueRef inputs_array;
- LLVMTypeRef float4_ptr_type;
+static LLVMValueRef
+emit_fetch_input(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_src_register * reg,
+ const unsigned swizzle)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_build_context *uint_bld = &bld->uint_bld;
+ LLVMValueRef indirect_index = NULL;
+ LLVMValueRef res;
- /* index_vec = (indirect_index * 4 + swizzle) * length */
- index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
- index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
- index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
+ if (reg->Register.Indirect) {
+ indirect_index = get_indirect_index(bld,
+ reg->Register.File,
+ reg->Register.Index,
+ ®->Indirect);
+ }
- /* cast inputs_array pointer to float* */
- float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
- inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
+ if (reg->Register.Indirect) {
+ LLVMValueRef swizzle_vec =
+ lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
+ LLVMValueRef length_vec =
+ lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
+ LLVMValueRef index_vec; /* index into the const buffer */
+ LLVMValueRef inputs_array;
+ LLVMTypeRef float4_ptr_type;
+
+ /* index_vec = (indirect_index * 4 + swizzle) * length */
+ index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
+ index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
+ index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
+
+ /* cast inputs_array pointer to float* */
+ float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
+ inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
float4_ptr_type, "");
- /* Gather values from the temporary register array */
- res = build_gather(bld, inputs_array, index_vec);
- } else {
- if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
- LLVMValueRef lindex = lp_build_const_int32(gallivm,
- reg->Register.Index * 4 + swizzle);
- LLVMValueRef input_ptr = LLVMBuildGEP(builder,
- bld->inputs_array, &lindex, 1, "");
- res = LLVMBuildLoad(builder, input_ptr, "");
- }
- else {
- res = bld->inputs[reg->Register.Index][swizzle];
- }
- }
- assert(res);
- break;
-
- case TGSI_FILE_TEMPORARY:
- if (reg->Register.Indirect) {
- LLVMValueRef swizzle_vec =
- lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle);
- LLVMValueRef length_vec =
- lp_build_const_int_vec(bld->base.gallivm, uint_bld->type,
- bld->base.type.length);
- LLVMValueRef index_vec; /* index into the const buffer */
- LLVMValueRef temps_array;
- LLVMTypeRef float4_ptr_type;
-
- /* index_vec = (indirect_index * 4 + swizzle) * length */
- index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
- index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
- index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
-
- /* cast temps_array pointer to float* */
- float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->base.gallivm->context), 0);
- temps_array = LLVMBuildBitCast(builder, bld->temps_array,
- float4_ptr_type, "");
-
- /* Gather values from the temporary register array */
- res = build_gather(bld, temps_array, index_vec);
+ /* Gather values from the temporary register array */
+ res = build_gather(bld, inputs_array, index_vec);
+ } else {
+ if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
+ LLVMValueRef lindex = lp_build_const_int32(gallivm,
+ reg->Register.Index * 4 + swizzle);
+ LLVMValueRef input_ptr = LLVMBuildGEP(builder,
+ bld->inputs_array, &lindex, 1, "");
+ res = LLVMBuildLoad(builder, input_ptr, "");
}
else {
- LLVMValueRef temp_ptr;
- temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
- res = LLVMBuildLoad(builder, temp_ptr, "");
- if (!res)
- return bld->base.undef;
+ res = bld->inputs[reg->Register.Index][swizzle];
}
- break;
-
- case TGSI_FILE_SYSTEM_VALUE:
- assert(!reg->Register.Indirect);
- {
- LLVMValueRef index; /* index into the system value array */
- LLVMValueRef scalar, scalar_ptr;
-
- index = lp_build_const_int32(gallivm,
- reg->Register.Index * 4 + swizzle);
-
- scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array,
- &index, 1, "");
- scalar = LLVMBuildLoad(builder, scalar_ptr, "");
+ }
+ assert(res);
+ return res;
+}
- res = lp_build_broadcast_scalar(&bld->base, scalar);
- }
- break;
+static LLVMValueRef
+emit_fetch_temporary(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_src_register * reg,
+ const unsigned swizzle)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_build_context *uint_bld = &bld->uint_bld;
+ LLVMValueRef indirect_index = NULL;
+ LLVMValueRef res;
- default:
- assert(0 && "invalid src register in emit_fetch()");
- return bld->base.undef;
+ if (reg->Register.Indirect) {
+ indirect_index = get_indirect_index(bld,
+ reg->Register.File,
+ reg->Register.Index,
+ ®->Indirect);
}
- if (reg->Register.Absolute) {
- res = lp_build_abs( &bld->base, res );
+ if (reg->Register.Indirect) {
+ LLVMValueRef swizzle_vec =
+ lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
+ LLVMValueRef length_vec =
+ lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
+ bld->bld_base.base.type.length);
+ LLVMValueRef index_vec; /* index into the const buffer */
+ LLVMValueRef temps_array;
+ LLVMTypeRef float4_ptr_type;
+
+ /* index_vec = (indirect_index * 4 + swizzle) * length */
+ index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
+ index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
+ index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
+
+ /* cast temps_array pointer to float* */
+ float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
+ temps_array = LLVMBuildBitCast(builder, bld->temps_array,
+ float4_ptr_type, "");
+
+ /* Gather values from the temporary register array */
+ res = build_gather(bld, temps_array, index_vec);
}
-
- if (reg->Register.Negate) {
- res = lp_build_negate( &bld->base, res );
+ else {
+ LLVMValueRef temp_ptr;
+ temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
+ res = LLVMBuildLoad(builder, temp_ptr, "");
+ if (!res)
+ return bld->bld_base.base.undef;
}
return res;
}
+static LLVMValueRef
+emit_fetch_system_value(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_src_register * reg,
+ const unsigned swizzle)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef index; /* index into the system value array */
+ LLVMValueRef scalar, scalar_ptr;
+
+ assert(!reg->Register.Indirect);
+
+ index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
+
+ scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array, &index, 1, "");
+ scalar = LLVMBuildLoad(builder, scalar_ptr, "");
+
+ return lp_build_broadcast_scalar(&bld->bld_base.base, scalar);
+}
/**
* Register fetch with derivatives.
@@ -785,27 +717,21 @@ emit_fetch(
static void
emit_fetch_deriv(
struct lp_build_tgsi_soa_context *bld,
- const struct tgsi_full_instruction *inst,
- unsigned index,
- const unsigned chan_index,
+ LLVMValueRef src,
LLVMValueRef *res,
LLVMValueRef *ddx,
LLVMValueRef *ddy)
{
- LLVMValueRef src;
-
- src = emit_fetch(bld, inst, index, chan_index);
-
if(res)
*res = src;
/* TODO: use interpolation coeffs for inputs */
if(ddx)
- *ddx = lp_build_ddx(&bld->base, src);
+ *ddx = lp_build_ddx(&bld->bld_base.base, src);
if(ddy)
- *ddy = lp_build_ddy(&bld->base, src);
+ *ddy = lp_build_ddy(&bld->bld_base.base, src);
}
@@ -818,7 +744,7 @@ emit_fetch_predicate(
const struct tgsi_full_instruction *inst,
LLVMValueRef *pred)
{
- LLVMBuilderRef builder = bld->base.gallivm->builder;
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
unsigned index;
unsigned char swizzles[4];
LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
@@ -858,11 +784,11 @@ emit_fetch_predicate(
* is needlessly causing two comparisons due to storing the intermediate
* result as float vector instead of an integer mask vector.
*/
- value = lp_build_compare(bld->base.gallivm,
- bld->base.type,
+ value = lp_build_compare(bld->bld_base.base.gallivm,
+ bld->bld_base.base.type,
PIPE_FUNC_NOTEQUAL,
value,
- bld->base.zero);
+ bld->bld_base.base.zero);
if (inst->Predicate.Negate) {
value = LLVMBuildNot(builder, value, "");
}
@@ -881,15 +807,16 @@ emit_fetch_predicate(
* Register store.
*/
static void
-emit_store(
- struct lp_build_tgsi_soa_context *bld,
+emit_store_chan(
+ struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_instruction *inst,
unsigned index,
unsigned chan_index,
LLVMValueRef pred,
LLVMValueRef value)
{
- struct gallivm_state *gallivm = bld->base.gallivm;
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
const struct tgsi_full_dst_register *reg = &inst->Dst[index];
struct lp_build_context *uint_bld = &bld->uint_bld;
@@ -900,13 +827,13 @@ emit_store(
break;
case TGSI_SAT_ZERO_ONE:
- value = lp_build_max(&bld->base, value, bld->base.zero);
- value = lp_build_min(&bld->base, value, bld->base.one);
+ value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
+ value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
break;
case TGSI_SAT_MINUS_PLUS_ONE:
- value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0));
- value = lp_build_min(&bld->base, value, bld->base.one);
+ value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
+ value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
break;
default:
@@ -919,7 +846,8 @@ emit_store(
reg->Register.Index,
®->Indirect);
} else {
- assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
+ assert(reg->Register.Index <=
+ bld->bld_base.info->file_max[reg->Register.File]);
}
switch( reg->Register.File ) {
@@ -928,7 +856,7 @@ emit_store(
LLVMValueRef chan_vec =
lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
LLVMValueRef length_vec =
- lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length);
+ lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
LLVMValueRef index_vec; /* indexes into the temp registers */
LLVMValueRef outputs_array;
LLVMValueRef pixel_offsets;
@@ -937,7 +865,7 @@ emit_store(
/* build pixel offset vector: {0, 1, 2, 3, ...} */
pixel_offsets = uint_bld->undef;
- for (i = 0; i < bld->base.type.length; i++) {
+ for (i = 0; i < bld->bld_base.base.type.length; i++) {
LLVMValueRef ii = lp_build_const_int32(gallivm, i);
pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
ii, ii, "");
@@ -959,7 +887,7 @@ emit_store(
&bld->exec_mask, pred);
}
else {
- LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index,
+ LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
chan_index);
lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr);
}
@@ -971,7 +899,7 @@ emit_store(
lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
LLVMValueRef length_vec =
lp_build_const_int_vec(gallivm, uint_bld->type,
- bld->base.type.length);
+ bld->bld_base.base.type.length);
LLVMValueRef index_vec; /* indexes into the temp registers */
LLVMValueRef temps_array;
LLVMValueRef pixel_offsets;
@@ -980,7 +908,7 @@ emit_store(
/* build pixel offset vector: {0, 1, 2, 3, ...} */
pixel_offsets = uint_bld->undef;
- for (i = 0; i < bld->base.type.length; i++) {
+ for (i = 0; i < bld->bld_base.base.type.length; i++) {
LLVMValueRef ii = lp_build_const_int32(gallivm, i);
pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
ii, ii, "");
@@ -1002,7 +930,7 @@ emit_store(
&bld->exec_mask, pred);
}
else {
- LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
+ LLVMValueRef temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
chan_index);
lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
}
@@ -1023,6 +951,27 @@ emit_store(
}
}
+static void
+emit_store(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_instruction * inst,
+ const struct tgsi_opcode_info * info,
+ LLVMValueRef dst[4])
+
+{
+ unsigned chan_index;
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ if(info->num_dst) {
+ LLVMValueRef pred[TGSI_NUM_CHANNELS];
+
+ emit_fetch_predicate( bld, inst, pred );
+
+ TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
+ }
+ }
+}
/**
* High-level instruction translators.
@@ -1034,7 +983,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
enum lp_build_tex_modifier modifier,
LLVMValueRef *texel)
{
- LLVMBuilderRef builder = bld->base.gallivm->builder;
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
unsigned unit;
LLVMValueRef lod_bias, explicit_lod;
LLVMValueRef oow = NULL;
@@ -1047,7 +996,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
if (!bld->sampler) {
_debug_printf("warning: found texture instruction but no sampler generator supplied\n");
for (i = 0; i < 4; i++) {
- texel[i] = bld->base.undef;
+ texel[i] = bld->bld_base.base.undef;
}
return;
}
@@ -1079,12 +1028,12 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
}
if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
- lod_bias = emit_fetch( bld, inst, 0, 3 );
+ lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
explicit_lod = NULL;
}
else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
lod_bias = NULL;
- explicit_lod = emit_fetch( bld, inst, 0, 3 );
+ explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
}
else {
lod_bias = NULL;
@@ -1092,43 +1041,43 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
}
if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
- oow = emit_fetch( bld, inst, 0, 3 );
- oow = lp_build_rcp(&bld->base, oow);
+ oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
+ oow = lp_build_rcp(&bld->bld_base.base, oow);
}
for (i = 0; i < num_coords; i++) {
- coords[i] = emit_fetch( bld, inst, 0, i );
+ coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
- coords[i] = lp_build_mul(&bld->base, coords[i], oow);
+ coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
}
for (i = num_coords; i < 3; i++) {
- coords[i] = bld->base.undef;
+ coords[i] = bld->bld_base.base.undef;
}
if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
- LLVMValueRef index0 = lp_build_const_int32(bld->base.gallivm, 0);
+ LLVMValueRef index0 = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
for (i = 0; i < num_coords; i++) {
- LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
- LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
+ LLVMValueRef src1 = lp_build_emit_fetch( &bld->bld_base, inst, 1, i );
+ LLVMValueRef src2 = lp_build_emit_fetch( &bld->bld_base, inst, 2, i );
ddx[i] = LLVMBuildExtractElement(builder, src1, index0, "");
ddy[i] = LLVMBuildExtractElement(builder, src2, index0, "");
}
unit = inst->Src[3].Register.Index;
} else {
for (i = 0; i < num_coords; i++) {
- ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
- ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
+ ddx[i] = lp_build_scalar_ddx( &bld->bld_base.base, coords[i] );
+ ddy[i] = lp_build_scalar_ddy( &bld->bld_base.base, coords[i] );
}
unit = inst->Src[1].Register.Index;
}
for (i = num_coords; i < 3; i++) {
- ddx[i] = LLVMGetUndef(bld->base.elem_type);
- ddy[i] = LLVMGetUndef(bld->base.elem_type);
+ ddx[i] = LLVMGetUndef(bld->bld_base.base.elem_type);
+ ddy[i] = LLVMGetUndef(bld->bld_base.base.elem_type);
}
bld->sampler->emit_fetch_texel(bld->sampler,
- bld->base.gallivm,
- bld->base.type,
+ bld->bld_base.base.gallivm,
+ bld->bld_base.base.type,
unit, num_coords, coords,
ddx, ddy,
lod_bias, explicit_lod,
@@ -1144,10 +1093,10 @@ near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
for (i = 0; i < 5; i++) {
unsigned opcode;
- if (pc + i >= bld->info->num_instructions)
+ if (pc + i >= bld->bld_base.info->num_instructions)
return TRUE;
- opcode = bld->instructions[pc + i].Instruction.Opcode;
+ opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
if (opcode == TGSI_OPCODE_END)
return TRUE;
@@ -1182,9 +1131,9 @@ emit_kil(
const struct tgsi_full_instruction *inst,
int pc)
{
- LLVMBuilderRef builder = bld->base.gallivm->builder;
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
const struct tgsi_full_src_register *reg = &inst->Src[0];
- LLVMValueRef terms[NUM_CHANNELS];
+ LLVMValueRef terms[TGSI_NUM_CHANNELS];
LLVMValueRef mask;
unsigned chan_index;
@@ -1197,10 +1146,10 @@ emit_kil(
swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
/* Check if the component has not been already tested. */
- assert(swizzle < NUM_CHANNELS);
+ assert(swizzle < TGSI_NUM_CHANNELS);
if( !terms[swizzle] )
/* TODO: change the comparison operator instead of setting the sign */
- terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
+ terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
}
mask = NULL;
@@ -1211,7 +1160,7 @@ emit_kil(
/*
* If term < 0 then mask = 0 else mask = ~0.
*/
- chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
+ chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
if(mask)
mask = LLVMBuildAnd(builder, mask, chan_mask, "");
@@ -1237,10 +1186,9 @@ emit_kil(
*/
static void
emit_kilp(struct lp_build_tgsi_soa_context *bld,
- const struct tgsi_full_instruction *inst,
- int pc)
+ int pc)
{
- LLVMBuilderRef builder = bld->base.gallivm->builder;
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
LLVMValueRef mask;
/* For those channels which are "alive", disable fragment shader
@@ -1250,7 +1198,7 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld,
mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
}
else {
- LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type);
+ LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
mask = zero;
}
@@ -1268,7 +1216,7 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld,
static void
emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
{
- struct gallivm_state *gallivm = bld->base.gallivm;
+ struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef temp_ptr;
LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
@@ -1276,7 +1224,7 @@ emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
int index;
- int n = bld->info->file_max[TGSI_FILE_TEMPORARY];
+ int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
for (index = 0; index < n; index++) {
LLVMValueRef idx = lp_build_const_int32(gallivm, index);
@@ -1286,7 +1234,7 @@ emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
for (chan = 0; chan < 4; chan++) {
- temp_ptr = get_temp_ptr(bld, index, chan);
+ temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
res = LLVMBuildLoad(builder, temp_ptr, "");
v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
@@ -1307,31 +1255,32 @@ emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
-static void
-emit_declaration(
- struct lp_build_tgsi_soa_context *bld,
+void
+lp_emit_declaration_soa(
+ struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_declaration *decl)
{
- struct gallivm_state *gallivm = bld->base.gallivm;
- LLVMTypeRef vec_type = bld->base.vec_type;
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+ LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
const unsigned first = decl->Range.First;
const unsigned last = decl->Range.Last;
unsigned idx, i;
for (idx = first; idx <= last; ++idx) {
- assert(last <= bld->info->file_max[decl->Declaration.File]);
+ assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
switch (decl->Declaration.File) {
case TGSI_FILE_TEMPORARY:
assert(idx < LP_MAX_TGSI_TEMPS);
if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
- for (i = 0; i < NUM_CHANNELS; i++)
+ for (i = 0; i < TGSI_NUM_CHANNELS; i++)
bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
}
break;
case TGSI_FILE_OUTPUT:
if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
- for (i = 0; i < NUM_CHANNELS; i++)
+ for (i = 0; i < TGSI_NUM_CHANNELS; i++)
bld->outputs[idx][i] = lp_build_alloca(gallivm,
vec_type, "output");
}
@@ -1339,13 +1288,13 @@ emit_declaration(
case TGSI_FILE_ADDRESS:
assert(idx < LP_MAX_TGSI_ADDRS);
- for (i = 0; i < NUM_CHANNELS; i++)
+ for (i = 0; i < TGSI_NUM_CHANNELS; i++)
bld->addr[idx][i] = lp_build_alloca(gallivm, vec_type, "addr");
break;
case TGSI_FILE_PREDICATE:
assert(idx < LP_MAX_TGSI_PREDS);
- for (i = 0; i < NUM_CHANNELS; i++)
+ for (i = 0; i < TGSI_NUM_CHANNELS; i++)
bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
"predicate");
break;
@@ -1358,965 +1307,427 @@ emit_declaration(
}
-/**
- * Emit LLVM for one TGSI instruction.
- * \param return TRUE for success, FALSE otherwise
- */
-static boolean
-emit_instruction(
- struct lp_build_tgsi_soa_context *bld,
- const struct tgsi_full_instruction *inst,
- const struct tgsi_opcode_info *info,
- int *pc)
+void lp_emit_immediate_soa(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_immediate *imm)
{
- unsigned chan_index;
- LLVMValueRef src0, src1, src2;
- LLVMValueRef tmp0, tmp1, tmp2;
- LLVMValueRef tmp3 = NULL;
- LLVMValueRef tmp4 = NULL;
- LLVMValueRef tmp5 = NULL;
- LLVMValueRef tmp6 = NULL;
- LLVMValueRef tmp7 = NULL;
- LLVMValueRef res;
- LLVMValueRef dst0[NUM_CHANNELS];
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
- /*
- * Stores and write masks are handled in a general fashion after the long
- * instruction opcode switch statement.
- *
- * Although not stricitly necessary, we avoid generating instructions for
- * channels which won't be stored, in cases where's that easy. For some
- * complex instructions, like texture sampling, it is more convenient to
- * assume a full writemask and then let LLVM optimization passes eliminate
- * redundant code.
- */
+ /* simply copy the immediate values into the next immediates[] slot */
+ unsigned i;
+ const uint size = imm->Immediate.NrTokens - 1;
+ assert(size <= 4);
+ assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
- (*pc)++;
+ for( i = 0; i < size; ++i )
+ bld->immediates[bld->num_immediates][i] =
+ lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
- assert(info->num_dst <= 1);
- if (info->num_dst) {
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- dst0[chan_index] = bld->base.undef;
- }
- }
+ for( i = size; i < 4; ++i )
+ bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_ARL:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- tmp0 = emit_fetch( bld, inst, 0, chan_index );
- tmp0 = lp_build_floor(&bld->base, tmp0);
- dst0[chan_index] = tmp0;
- }
- break;
+ bld->num_immediates++;
+}
- case TGSI_OPCODE_MOV:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
- }
- break;
+static void
+ddx_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_LIT:
- if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ) {
- dst0[TGSI_CHAN_X] = bld->base.one;
- }
- if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ) {
- src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
- dst0[TGSI_CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
- }
- if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) {
- /* XMM[1] = SrcReg[0].yyyy */
- tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
- /* XMM[1] = max(XMM[1], 0) */
- tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
- /* XMM[2] = SrcReg[0].wwww */
- tmp2 = emit_fetch( bld, inst, 0, TGSI_CHAN_W );
- tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
- tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
- tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
- dst0[TGSI_CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
- }
- if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) ) {
- dst0[TGSI_CHAN_W] = bld->base.one;
- }
- break;
+ emit_fetch_deriv(bld, emit_data->args[0], NULL,
+ &emit_data->output[emit_data->chan], NULL);
+}
- case TGSI_OPCODE_RCP:
- /* TGSI_OPCODE_RECIP */
- src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
- res = lp_build_rcp(&bld->base, src0);
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- dst0[chan_index] = res;
- }
- break;
+static void
+ddy_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_RSQ:
- /* TGSI_OPCODE_RECIPSQRT */
- src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
- src0 = lp_build_abs(&bld->base, src0);
- res = lp_build_rsqrt(&bld->base, src0);
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- dst0[chan_index] = res;
- }
- break;
+ emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
+ &emit_data->output[emit_data->chan]);
+}
- case TGSI_OPCODE_EXP:
- if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
- TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ||
- TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) {
- LLVMValueRef *p_exp2_int_part = NULL;
- LLVMValueRef *p_frac_part = NULL;
- LLVMValueRef *p_exp2 = NULL;
-
- src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
-
- if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
- p_exp2_int_part = &tmp0;
- if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ))
- p_frac_part = &tmp1;
- if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
- p_exp2 = &tmp2;
-
- lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
-
- if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
- dst0[TGSI_CHAN_X] = tmp0;
- if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ))
- dst0[TGSI_CHAN_Y] = tmp1;
- if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
- dst0[TGSI_CHAN_Z] = tmp2;
- }
- /* dst.w = 1.0 */
- if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W )) {
- dst0[TGSI_CHAN_W] = bld->base.one;
- }
- break;
+static void
+kilp_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_LOG:
- if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
- TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ||
- TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) {
- LLVMValueRef *p_floor_log2 = NULL;
- LLVMValueRef *p_exp = NULL;
- LLVMValueRef *p_log2 = NULL;
-
- src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
- src0 = lp_build_abs( &bld->base, src0 );
-
- if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
- p_floor_log2 = &tmp0;
- if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ))
- p_exp = &tmp1;
- if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
- p_log2 = &tmp2;
-
- lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
-
- /* dst.x = floor(lg2(abs(src.x))) */
- if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
- dst0[TGSI_CHAN_X] = tmp0;
- /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
- if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y )) {
- dst0[TGSI_CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
- }
- /* dst.z = lg2(abs(src.x)) */
- if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
- dst0[TGSI_CHAN_Z] = tmp2;
- }
- /* dst.w = 1.0 */
- if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W )) {
- dst0[TGSI_CHAN_W] = bld->base.one;
- }
- break;
+ emit_kilp(bld, bld_base->pc - 1);
+}
- case TGSI_OPCODE_MUL:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- src0 = emit_fetch( bld, inst, 0, chan_index );
- src1 = emit_fetch( bld, inst, 1, chan_index );
- dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
- }
- break;
+static void
+kil_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_ADD:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- src0 = emit_fetch( bld, inst, 0, chan_index );
- src1 = emit_fetch( bld, inst, 1, chan_index );
- dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
- }
- break;
+ emit_kil(bld, emit_data->inst, bld_base->pc - 1);
+}
- case TGSI_OPCODE_DP3:
- /* TGSI_OPCODE_DOT3 */
- tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
- tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
- tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
- tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
- tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
- tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
- tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
- tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
- tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
- tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
- tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- dst0[chan_index] = tmp0;
- }
- break;
+static void
+tex_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_DP4:
- /* TGSI_OPCODE_DOT4 */
- tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
- tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
- tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
- tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
- tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
- tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
- tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
- tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
- tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
- tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
- tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
- tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_W );
- tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_W );
- tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
- tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- dst0[chan_index] = tmp0;
- }
- break;
+ emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output);
+}
- case TGSI_OPCODE_DST:
- TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) {
- dst0[TGSI_CHAN_X] = bld->base.one;
- }
- TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) {
- tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
- tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
- dst0[TGSI_CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
- }
- TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) {
- dst0[TGSI_CHAN_Z] = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
- }
- TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) {
- dst0[TGSI_CHAN_W] = emit_fetch( bld, inst, 1, TGSI_CHAN_W );
- }
- break;
+static void
+txb_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_MIN:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- src0 = emit_fetch( bld, inst, 0, chan_index );
- src1 = emit_fetch( bld, inst, 1, chan_index );
- dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
- }
- break;
+ emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
+ emit_data->output);
+}
- case TGSI_OPCODE_MAX:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- src0 = emit_fetch( bld, inst, 0, chan_index );
- src1 = emit_fetch( bld, inst, 1, chan_index );
- dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
- }
- break;
+static void
+txd_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_SLT:
- /* TGSI_OPCODE_SETLT */
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- src0 = emit_fetch( bld, inst, 0, chan_index );
- src1 = emit_fetch( bld, inst, 1, chan_index );
- tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
- dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- }
- break;
+ emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
+ emit_data->output);
+}
- case TGSI_OPCODE_SGE:
- /* TGSI_OPCODE_SETGE */
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- src0 = emit_fetch( bld, inst, 0, chan_index );
- src1 = emit_fetch( bld, inst, 1, chan_index );
- tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
- dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- }
- break;
+static void
+txl_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_MAD:
- /* TGSI_OPCODE_MADD */
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- tmp0 = emit_fetch( bld, inst, 0, chan_index );
- tmp1 = emit_fetch( bld, inst, 1, chan_index );
- tmp2 = emit_fetch( bld, inst, 2, chan_index );
- tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
- tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
- dst0[chan_index] = tmp0;
- }
- break;
+ emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
+ emit_data->output);
+}
- case TGSI_OPCODE_SUB:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- tmp0 = emit_fetch( bld, inst, 0, chan_index );
- tmp1 = emit_fetch( bld, inst, 1, chan_index );
- dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
- }
- break;
+static void
+txp_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_LRP:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- src0 = emit_fetch( bld, inst, 0, chan_index );
- src1 = emit_fetch( bld, inst, 1, chan_index );
- src2 = emit_fetch( bld, inst, 2, chan_index );
- tmp0 = lp_build_sub( &bld->base, src1, src2 );
- tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
- dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
- }
- break;
+ emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
+ emit_data->output);
+}
- case TGSI_OPCODE_CND:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- src0 = emit_fetch( bld, inst, 0, chan_index );
- src1 = emit_fetch( bld, inst, 1, chan_index );
- src2 = emit_fetch( bld, inst, 2, chan_index );
- tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5);
- tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
- dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
- }
- break;
-
- case TGSI_OPCODE_DP2A:
- tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); /* xmm0 = src[0].x */
- tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); /* xmm1 = src[1].x */
- tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
- tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); /* xmm1 = src[0].y */
- tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); /* xmm2 = src[1].y */
- tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
- tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
- tmp1 = emit_fetch( bld, inst, 2, TGSI_CHAN_X ); /* xmm1 = src[2].x */
- tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
- }
- break;
+static void
+cal_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_FRC:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- src0 = emit_fetch( bld, inst, 0, chan_index );
- tmp0 = lp_build_floor(&bld->base, src0);
- tmp0 = lp_build_sub(&bld->base, src0, tmp0);
- dst0[chan_index] = tmp0;
- }
- break;
+ lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
+ &bld_base->pc);
+}
- case TGSI_OPCODE_CLAMP:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- tmp0 = emit_fetch( bld, inst, 0, chan_index );
- src1 = emit_fetch( bld, inst, 1, chan_index );
- src2 = emit_fetch( bld, inst, 2, chan_index );
- tmp0 = lp_build_max(&bld->base, tmp0, src1);
- tmp0 = lp_build_min(&bld->base, tmp0, src2);
- dst0[chan_index] = tmp0;
- }
- break;
+static void
+ret_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_FLR:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- tmp0 = emit_fetch( bld, inst, 0, chan_index );
- dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
- }
- break;
+ lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
+}
- case TGSI_OPCODE_ROUND:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- tmp0 = emit_fetch( bld, inst, 0, chan_index );
- dst0[chan_index] = lp_build_round(&bld->base, tmp0);
- }
- break;
+static void
+brk_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_EX2: {
- tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
- tmp0 = lp_build_exp2( &bld->base, tmp0);
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- dst0[chan_index] = tmp0;
- }
- break;
- }
+ lp_exec_break(&bld->exec_mask);
+}
- case TGSI_OPCODE_LG2:
- tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
- tmp0 = lp_build_log2( &bld->base, tmp0);
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- dst0[chan_index] = tmp0;
- }
- break;
+static void
+if_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef tmp;
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_POW:
- src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
- src1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
- res = lp_build_pow( &bld->base, src0, src1 );
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- dst0[chan_index] = res;
- }
- break;
+ tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
+ emit_data->args[0], bld->bld_base.base.zero);
+ lp_exec_mask_cond_push(&bld->exec_mask, tmp);
+}
- case TGSI_OPCODE_XPD:
- if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
- TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ) {
- tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
- tmp3 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
- }
- if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
- TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) {
- tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
- tmp4 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
- }
- TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) {
- tmp2 = tmp0;
- tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
- tmp5 = tmp3;
- tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
- tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
- dst0[TGSI_CHAN_X] = tmp2;
- }
- if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ||
- TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) {
- tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
- tmp5 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
- }
- TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) {
- tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
- tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
- tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
- dst0[TGSI_CHAN_Y] = tmp3;
- }
- TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) {
- tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
- tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
- tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
- dst0[TGSI_CHAN_Z] = tmp5;
- }
- TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) {
- dst0[TGSI_CHAN_W] = bld->base.one;
- }
- break;
+static void
+bgnloop_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_ABS:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- tmp0 = emit_fetch( bld, inst, 0, chan_index );
- dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
- }
- break;
+ lp_exec_bgnloop(&bld->exec_mask);
+}
- case TGSI_OPCODE_RCC:
- /* deprecated? */
- assert(0);
- return FALSE;
-
- case TGSI_OPCODE_DPH:
- tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
- tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
- tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
- tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
- tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
- tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
- tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
- tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
- tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
- tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
- tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
- tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_W );
- tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- dst0[chan_index] = tmp0;
- }
- break;
+static void
+bgnsub_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_COS:
- tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
- tmp0 = lp_build_cos( &bld->base, tmp0 );
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- dst0[chan_index] = tmp0;
- }
- break;
+ lp_exec_mask_bgnsub(&bld->exec_mask);
+}
- case TGSI_OPCODE_DDX:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
- }
- break;
+static void
+else_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_DDY:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
- }
- break;
+ lp_exec_mask_cond_invert(&bld->exec_mask);
+}
- case TGSI_OPCODE_KILP:
- /* predicated kill */
- emit_kilp( bld, inst, (*pc)-1 );
- break;
+static void
+endif_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_KIL:
- /* conditional kill */
- emit_kil( bld, inst, (*pc)-1 );
- break;
+ lp_exec_mask_cond_pop(&bld->exec_mask);
+}
- case TGSI_OPCODE_PK2H:
- return FALSE;
- break;
+static void
+endloop_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_PK2US:
- return FALSE;
- break;
+ lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
+}
- case TGSI_OPCODE_PK4B:
- return FALSE;
- break;
+static void
+endsub_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_PK4UB:
- return FALSE;
- break;
+ lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
+}
- case TGSI_OPCODE_RFL:
- return FALSE;
- break;
+static void
+cont_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_SEQ:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- src0 = emit_fetch( bld, inst, 0, chan_index );
- src1 = emit_fetch( bld, inst, 1, chan_index );
- tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
- dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- }
- break;
+ lp_exec_continue(&bld->exec_mask);
+}
- case TGSI_OPCODE_SFL:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- dst0[chan_index] = bld->base.zero;
- }
- break;
+/* XXX: Refactor and move it to lp_bld_action.c
+ *
+ * XXX: What do the comments about xmm registers mean? Maybe they are left over
+ * from old code, but there is no garauntee that LLVM will use those registers
+ * for this code.
+ *
+ * XXX: There should be no calls to lp_build_emit_fetch in this function. This
+ * should be handled by the emit_data->fetch_args function. */
+static void
+nrm_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef tmp0, tmp1;
+ LLVMValueRef tmp4 = NULL;
+ LLVMValueRef tmp5 = NULL;
+ LLVMValueRef tmp6 = NULL;
+ LLVMValueRef tmp7 = NULL;
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- case TGSI_OPCODE_SGT:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- src0 = emit_fetch( bld, inst, 0, chan_index );
- src1 = emit_fetch( bld, inst, 1, chan_index );
- tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
- dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- }
- break;
+ uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
- case TGSI_OPCODE_SIN:
- tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
- tmp0 = lp_build_sin( &bld->base, tmp0 );
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- dst0[chan_index] = tmp0;
- }
- break;
+ if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
+ TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
+ TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
+ (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) {
- case TGSI_OPCODE_SLE:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- src0 = emit_fetch( bld, inst, 0, chan_index );
- src1 = emit_fetch( bld, inst, 1, chan_index );
- tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
- dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- }
- break;
+ /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
- case TGSI_OPCODE_SNE:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- src0 = emit_fetch( bld, inst, 0, chan_index );
- src1 = emit_fetch( bld, inst, 1, chan_index );
- tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
- dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
+ /* xmm4 = src.x */
+ /* xmm0 = src.x * src.x */
+ tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X);
+ if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
+ tmp4 = tmp0;
}
- break;
+ tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
- case TGSI_OPCODE_STR:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- dst0[chan_index] = bld->base.one;
+ /* xmm5 = src.y */
+ /* xmm0 = xmm0 + src.y * src.y */
+ tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
+ if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
+ tmp5 = tmp1;
}
- break;
-
- case TGSI_OPCODE_TEX:
- emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 );
- break;
-
- case TGSI_OPCODE_TXD:
- emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
- break;
-
- case TGSI_OPCODE_UP2H:
- /* deprecated */
- assert (0);
- return FALSE;
- break;
-
- case TGSI_OPCODE_UP2US:
- /* deprecated */
- assert(0);
- return FALSE;
- break;
-
- case TGSI_OPCODE_UP4B:
- /* deprecated */
- assert(0);
- return FALSE;
- break;
-
- case TGSI_OPCODE_UP4UB:
- /* deprecated */
- assert(0);
- return FALSE;
- break;
-
- case TGSI_OPCODE_X2D:
- /* deprecated? */
- assert(0);
- return FALSE;
- break;
-
- case TGSI_OPCODE_ARA:
- /* deprecated */
- assert(0);
- return FALSE;
- break;
+ tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
+ tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
- case TGSI_OPCODE_ARR:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- tmp0 = emit_fetch( bld, inst, 0, chan_index );
- tmp0 = lp_build_round(&bld->base, tmp0);
- dst0[chan_index] = tmp0;
+ /* xmm6 = src.z */
+ /* xmm0 = xmm0 + src.z * src.z */
+ tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z);
+ if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
+ tmp6 = tmp1;
}
- break;
-
- case TGSI_OPCODE_BRA:
- /* deprecated */
- assert(0);
- return FALSE;
- break;
-
- case TGSI_OPCODE_CAL:
- lp_exec_mask_call(&bld->exec_mask,
- inst->Label.Label,
- pc);
+ tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
+ tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
- break;
-
- case TGSI_OPCODE_RET:
- lp_exec_mask_ret(&bld->exec_mask, pc);
- break;
-
- case TGSI_OPCODE_END:
- if (0) {
- /* for debugging */
- emit_dump_temps(bld);
+ if (dims == 4) {
+ /* xmm7 = src.w */
+ /* xmm0 = xmm0 + src.w * src.w */
+ tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W);
+ if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) {
+ tmp7 = tmp1;
+ }
+ tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
+ tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
}
- *pc = -1;
- break;
-
- case TGSI_OPCODE_SSG:
- /* TGSI_OPCODE_SGN */
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- tmp0 = emit_fetch( bld, inst, 0, chan_index );
- dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
+ /* xmm1 = 1 / sqrt(xmm0) */
+ tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
+ /* dst.x = xmm1 * src.x */
+ if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
+ emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1);
}
- break;
-
- case TGSI_OPCODE_CMP:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- src0 = emit_fetch( bld, inst, 0, chan_index );
- src1 = emit_fetch( bld, inst, 1, chan_index );
- src2 = emit_fetch( bld, inst, 2, chan_index );
- tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
- dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
+ /* dst.y = xmm1 * src.y */
+ if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
+ emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1);
}
- break;
- case TGSI_OPCODE_SCS:
- TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) {
- tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
- dst0[TGSI_CHAN_X] = lp_build_cos( &bld->base, tmp0 );
- }
- TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) {
- tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
- dst0[TGSI_CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
+ /* dst.z = xmm1 * src.z */
+ if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
+ emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1);
}
- TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) {
- dst0[TGSI_CHAN_Z] = bld->base.zero;
+ /* dst.w = xmm1 * src.w */
+ if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) {
+ emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1);
}
- TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) {
- dst0[TGSI_CHAN_W] = bld->base.one;
- }
- break;
-
- case TGSI_OPCODE_TXB:
- emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 );
- break;
-
- case TGSI_OPCODE_NRM:
- /* fall-through */
- case TGSI_OPCODE_NRM4:
- /* 3 or 4-component normalization */
- {
- uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
-
- if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) ||
- TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y) ||
- TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z) ||
- (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W) && dims == 4)) {
-
- /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
-
- /* xmm4 = src.x */
- /* xmm0 = src.x * src.x */
- tmp0 = emit_fetch(bld, inst, 0, TGSI_CHAN_X);
- if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X)) {
- tmp4 = tmp0;
- }
- tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
-
- /* xmm5 = src.y */
- /* xmm0 = xmm0 + src.y * src.y */
- tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_Y);
- if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) {
- tmp5 = tmp1;
- }
- tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
- tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
-
- /* xmm6 = src.z */
- /* xmm0 = xmm0 + src.z * src.z */
- tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_Z);
- if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) {
- tmp6 = tmp1;
- }
- tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
- tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
-
- if (dims == 4) {
- /* xmm7 = src.w */
- /* xmm0 = xmm0 + src.w * src.w */
- tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_W);
- if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W)) {
- tmp7 = tmp1;
- }
- tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
- tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
- }
-
- /* xmm1 = 1 / sqrt(xmm0) */
- tmp1 = lp_build_rsqrt( &bld->base, tmp0);
-
- /* dst.x = xmm1 * src.x */
- if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X)) {
- dst0[TGSI_CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
- }
-
- /* dst.y = xmm1 * src.y */
- if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) {
- dst0[TGSI_CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
- }
-
- /* dst.z = xmm1 * src.z */
- if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) {
- dst0[TGSI_CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
- }
-
- /* dst.w = xmm1 * src.w */
- if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) && dims == 4) {
- dst0[TGSI_CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
- }
- }
-
- /* dst.w = 1.0 */
- if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W) && dims == 3) {
- dst0[TGSI_CHAN_W] = bld->base.one;
- }
- }
- break;
-
- case TGSI_OPCODE_DIV:
- /* deprecated */
- assert( 0 );
- return FALSE;
- break;
-
- case TGSI_OPCODE_DP2:
- tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); /* xmm0 = src[0].x */
- tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); /* xmm1 = src[1].x */
- tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
- tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); /* xmm1 = src[0].y */
- tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); /* xmm2 = src[1].y */
- tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
- tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
- }
- break;
-
- case TGSI_OPCODE_TXL:
- emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 );
- break;
-
- case TGSI_OPCODE_TXP:
- emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 );
- break;
-
- case TGSI_OPCODE_BRK:
- lp_exec_break(&bld->exec_mask);
- break;
-
- case TGSI_OPCODE_IF:
- tmp0 = emit_fetch(bld, inst, 0, TGSI_CHAN_X);
- tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
- tmp0, bld->base.zero);
- lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
- break;
-
- case TGSI_OPCODE_BGNLOOP:
- lp_exec_bgnloop(&bld->exec_mask);
- break;
-
- case TGSI_OPCODE_BGNSUB:
- lp_exec_mask_bgnsub(&bld->exec_mask);
- break;
-
- case TGSI_OPCODE_ELSE:
- lp_exec_mask_cond_invert(&bld->exec_mask);
- break;
-
- case TGSI_OPCODE_ENDIF:
- lp_exec_mask_cond_pop(&bld->exec_mask);
- break;
-
- case TGSI_OPCODE_ENDLOOP:
- lp_exec_endloop(bld->base.gallivm, &bld->exec_mask);
- break;
+ }
- case TGSI_OPCODE_ENDSUB:
- lp_exec_mask_endsub(&bld->exec_mask, pc);
- break;
+ /* dst.w = 1.0 */
+ if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) {
+ emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
+ }
+}
- case TGSI_OPCODE_PUSHA:
- /* deprecated? */
- assert(0);
- return FALSE;
- break;
+static void emit_prologue(struct lp_build_tgsi_context * bld_base)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
- case TGSI_OPCODE_POPA:
- /* deprecated? */
- assert(0);
- return FALSE;
- break;
+ if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
+ LLVMValueRef array_size =
+ lp_build_const_int32(gallivm,
+ bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
+ bld->temps_array = lp_build_array_alloca(gallivm,
+ bld_base->base.vec_type, array_size,
+ "temp_array");
+ }
- case TGSI_OPCODE_CEIL:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- tmp0 = emit_fetch( bld, inst, 0, chan_index );
- dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
- }
- break;
+ if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
+ LLVMValueRef array_size =
+ lp_build_const_int32(gallivm,
+ bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
+ bld->outputs_array = lp_build_array_alloca(gallivm,
+ bld_base->base.vec_type, array_size,
+ "output_array");
+ }
- case TGSI_OPCODE_I2F:
- /* deprecated? */
- assert(0);
- return FALSE;
- break;
+ /* If we have indirect addressing in inputs we need to copy them into
+ * our alloca array to be able to iterate over them */
+ if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
+ unsigned index, chan;
+ LLVMTypeRef vec_type = bld_base->base.vec_type;
+ LLVMValueRef array_size = lp_build_const_int32(gallivm,
+ bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
+ bld->inputs_array = lp_build_array_alloca(gallivm,
+ vec_type, array_size,
+ "input_array");
- case TGSI_OPCODE_NOT:
- /* deprecated? */
- assert(0);
- return FALSE;
- break;
+ assert(bld_base->info->num_inputs
+ <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
- case TGSI_OPCODE_TRUNC:
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- tmp0 = emit_fetch( bld, inst, 0, chan_index );
- dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
+ for (index = 0; index < bld_base->info->num_inputs; ++index) {
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+ LLVMValueRef lindex =
+ lp_build_const_int32(gallivm, index * 4 + chan);
+ LLVMValueRef input_ptr =
+ LLVMBuildGEP(gallivm->builder, bld->inputs_array,
+ &lindex, 1, "");
+ LLVMValueRef value = bld->inputs[index][chan];
+ if (value)
+ LLVMBuildStore(gallivm->builder, value, input_ptr);
+ }
}
- break;
-
- case TGSI_OPCODE_SHL:
- /* deprecated? */
- assert(0);
- return FALSE;
- break;
-
- case TGSI_OPCODE_ISHR:
- /* deprecated? */
- assert(0);
- return FALSE;
- break;
-
- case TGSI_OPCODE_AND:
- /* deprecated? */
- assert(0);
- return FALSE;
- break;
-
- case TGSI_OPCODE_OR:
- /* deprecated? */
- assert(0);
- return FALSE;
- break;
-
- case TGSI_OPCODE_MOD:
- /* deprecated? */
- assert(0);
- return FALSE;
- break;
-
- case TGSI_OPCODE_XOR:
- /* deprecated? */
- assert(0);
- return FALSE;
- break;
-
- case TGSI_OPCODE_SAD:
- /* deprecated? */
- assert(0);
- return FALSE;
- break;
-
- case TGSI_OPCODE_TXF:
- /* deprecated? */
- assert(0);
- return FALSE;
- break;
-
- case TGSI_OPCODE_TXQ:
- /* deprecated? */
- assert(0);
- return FALSE;
- break;
-
- case TGSI_OPCODE_CONT:
- lp_exec_continue(&bld->exec_mask);
- break;
-
- case TGSI_OPCODE_EMIT:
- return FALSE;
- break;
-
- case TGSI_OPCODE_ENDPRIM:
- return FALSE;
- break;
+ }
+}
- case TGSI_OPCODE_NOP:
- break;
+static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- default:
- return FALSE;
+ if (0) {
+ /* for debugging */
+ emit_dump_temps(bld);
}
-
- if(info->num_dst) {
- LLVMValueRef pred[NUM_CHANNELS];
- emit_fetch_predicate( bld, inst, pred );
-
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
+ /* If we have indirect addressing in outputs we need to copy our alloca array
+ * to the outputs slots specified by the called */
+ if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
+ unsigned index, chan;
+ assert(bld_base->info->num_outputs <=
+ bld_base->info->file_max[TGSI_FILE_OUTPUT] + 1);
+ for (index = 0; index < bld_base->info->num_outputs; ++index) {
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+ bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
+ }
}
}
-
- return TRUE;
}
-
void
lp_build_tgsi_soa(struct gallivm_state *gallivm,
const struct tgsi_token *tokens,
@@ -2325,17 +1736,12 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
LLVMValueRef consts_ptr,
LLVMValueRef system_values_array,
const LLVMValueRef *pos,
- const LLVMValueRef (*inputs)[NUM_CHANNELS],
- LLVMValueRef (*outputs)[NUM_CHANNELS],
+ const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
+ LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
struct lp_build_sampler_soa *sampler,
const struct tgsi_shader_info *info)
{
struct lp_build_tgsi_soa_context bld;
- struct tgsi_parse_context parse;
- uint num_immediates = 0;
- uint num_instructions = 0;
- unsigned i;
- int pc = 0;
struct lp_type res_type;
@@ -2347,7 +1753,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
/* Setup build context */
memset(&bld, 0, sizeof bld);
- lp_build_context_init(&bld.base, gallivm, type);
+ lp_build_context_init(&bld.bld_base.base, gallivm, type);
lp_build_context_init(&bld.uint_bld, gallivm, lp_uint_type(type));
lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
bld.mask = mask;
@@ -2356,145 +1762,55 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
bld.outputs = outputs;
bld.consts_ptr = consts_ptr;
bld.sampler = sampler;
- bld.info = info;
+ bld.bld_base.info = info;
bld.indirect_files = info->indirect_files;
- bld.instructions = (struct tgsi_full_instruction *)
- MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
- bld.max_instructions = LP_MAX_INSTRUCTIONS;
- if (!bld.instructions) {
- return;
- }
+ bld.bld_base.soa = TRUE;
+ bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
+ bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
+ bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
+ bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
+ bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
+ bld.bld_base.emit_store = emit_store;
+
+ bld.bld_base.emit_declaration = lp_emit_declaration_soa;
+ bld.bld_base.emit_immediate = lp_emit_immediate_soa;
+
+ bld.bld_base.emit_prologue = emit_prologue;
+ bld.bld_base.emit_epilogue = emit_epilogue;
+
+ /* Set opcode actions */
+ lp_set_default_actions_cpu(&bld.bld_base);
+
+ bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
+
+ lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
- lp_exec_mask_init(&bld.exec_mask, &bld.base);
-
- if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
- LLVMValueRef array_size =
- lp_build_const_int32(gallivm,
- info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
- bld.temps_array = lp_build_array_alloca(gallivm,
- bld.base.vec_type, array_size,
- "temp_array");
- }
-
- if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
- LLVMValueRef array_size =
- lp_build_const_int32(gallivm,
- info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
- bld.outputs_array = lp_build_array_alloca(gallivm,
- bld.base.vec_type, array_size,
- "output_array");
- }
-
- /* If we have indirect addressing in inputs we need to copy them into
- * our alloca array to be able to iterate over them */
- if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) {
- unsigned index, chan;
- LLVMTypeRef vec_type = bld.base.vec_type;
- LLVMValueRef array_size =
- lp_build_const_int32(gallivm, info->file_max[TGSI_FILE_INPUT]*4 + 4);
- bld.inputs_array = lp_build_array_alloca(gallivm,
- vec_type, array_size,
- "input_array");
-
- assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1);
-
- for (index = 0; index < info->num_inputs; ++index) {
- for (chan = 0; chan < NUM_CHANNELS; ++chan) {
- LLVMValueRef lindex =
- lp_build_const_int32(gallivm, index * 4 + chan);
- LLVMValueRef input_ptr =
- LLVMBuildGEP(gallivm->builder, bld.inputs_array,
- &lindex, 1, "");
- LLVMValueRef value = bld.inputs[index][chan];
- if (value)
- LLVMBuildStore(gallivm->builder, value, input_ptr);
- }
- }
- }
bld.system_values_array = system_values_array;
- tgsi_parse_init( &parse, tokens );
-
- while( !tgsi_parse_end_of_tokens( &parse ) ) {
- tgsi_parse_token( &parse );
-
- switch( parse.FullToken.Token.Type ) {
- case TGSI_TOKEN_TYPE_DECLARATION:
- /* Inputs already interpolated */
- emit_declaration( &bld, &parse.FullToken.FullDeclaration );
- break;
-
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- {
- /* save expanded instruction */
- if (num_instructions == bld.max_instructions) {
- struct tgsi_full_instruction *instructions;
- instructions = REALLOC(bld.instructions,
- bld.max_instructions
- * sizeof(struct tgsi_full_instruction),
- (bld.max_instructions + LP_MAX_INSTRUCTIONS)
- * sizeof(struct tgsi_full_instruction));
- if (!instructions) {
- break;
- }
- bld.instructions = instructions;
- bld.max_instructions += LP_MAX_INSTRUCTIONS;
- }
-
- memcpy(bld.instructions + num_instructions,
- &parse.FullToken.FullInstruction,
- sizeof(bld.instructions[0]));
-
- num_instructions++;
- }
-
- break;
-
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- /* simply copy the immediate values into the next immediates[] slot */
- {
- const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
- assert(size <= 4);
- assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
- for( i = 0; i < size; ++i )
- bld.immediates[num_immediates][i] =
- lp_build_const_vec(gallivm, type, parse.FullToken.FullImmediate.u[i].Float);
- for( i = size; i < 4; ++i )
- bld.immediates[num_immediates][i] = bld.base.undef;
- num_immediates++;
- }
- break;
-
- case TGSI_TOKEN_TYPE_PROPERTY:
- break;
-
- default:
- assert( 0 );
- }
- }
-
- while (pc != -1) {
- struct tgsi_full_instruction *instr = bld.instructions + pc;
- const struct tgsi_opcode_info *opcode_info =
- tgsi_get_opcode_info(instr->Instruction.Opcode);
- if (!emit_instruction( &bld, instr, opcode_info, &pc ))
- _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
- opcode_info->mnemonic);
- }
-
- /* If we have indirect addressing in outputs we need to copy our alloca array
- * to the outputs slots specified by the called */
- if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
- unsigned index, chan;
- assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1);
- for (index = 0; index < info->num_outputs; ++index) {
- for (chan = 0; chan < NUM_CHANNELS; ++chan) {
- bld.outputs[index][chan] = get_output_ptr(&bld, index, chan);
- }
- }
- }
+ lp_build_tgsi_llvm(&bld.bld_base, tokens);
if (0) {
LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
@@ -2504,7 +1820,6 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
lp_debug_dump_value(function);
debug_printf("2222222222222222222222222222 \n");
}
- tgsi_parse_free( &parse );
if (0) {
LLVMModuleRef module = LLVMGetGlobalParent(
@@ -2512,8 +1827,6 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
LLVMDumpModule(module);
}
-
- FREE( bld.instructions );
}
--
1.7.3.4
More information about the mesa-dev
mailing list