[Mesa-dev] [PATCH 6/6] gallivm: Add a new interface for doing TGSI->LLVM conversions
Jose Fonseca
jfonseca at vmware.com
Mon Jan 16 11:27:55 PST 2012
Tom,
Looks good in principle!
But I need to test this myself before I can be comfortable w/ merging it into master.
Just a few quick comments inline.
Jose
----- Original Message -----
> From: Tom Stellard <thomas.stellard at amd.com>
>
> lp_bld_tgsi_soa.c has been adapted to use this new interface, but
> lp_bld_tgsi_aos.c has only been partially adapted, since nothing in
> gallium currently uses it.
There are some closed source users of lp_bld_tgsi_aos.c. The rationale for open source it was to enable eventually using AoS in draw_llvm (as vertex shaders are usually more suitable for AoS), but it never happened yet.
Will lp_bld_tgsi_aos.c run correctly with this?
If it is not useful for r600g driver, then it might be better to leave lp_bld_tgsi_aos.c untouched for the time being. (I can do the conversion when I merge this into our private repositories).
> ---
> src/gallium/auxiliary/Makefile.sources | 2 +
> src/gallium/auxiliary/gallivm/lp_bld_action.c | 1182
> ++++++++++++++
> src/gallium/auxiliary/gallivm/lp_bld_action.h | 138 ++
> src/gallium/auxiliary/gallivm/lp_bld_tgsi.c | 409 +++++
> src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 341 ++++-
> src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c | 551 +++----
> src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 1981
> ++++++++---------------
> 7 files changed, 2952 insertions(+), 1652 deletions(-)
> create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_action.c
> create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_action.h
> create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
>
> diff --git a/src/gallium/auxiliary/Makefile.sources
> b/src/gallium/auxiliary/Makefile.sources
> index f55a4eb..547f63d 100644
> --- a/src/gallium/auxiliary/Makefile.sources
> +++ b/src/gallium/auxiliary/Makefile.sources
> @@ -155,6 +155,7 @@ GENERATED_SOURCES := \
> util/u_half.c
>
> GALLIVM_SOURCES := \
> + gallivm/lp_bld_action.c \
> gallivm/lp_bld_arit.c \
> gallivm/lp_bld_assert.c \
> gallivm/lp_bld_bitarit.c \
> @@ -176,6 +177,7 @@ GALLIVM_SOURCES := \
> gallivm/lp_bld_sample_soa.c \
> gallivm/lp_bld_struct.c \
> gallivm/lp_bld_swizzle.c \
> + gallivm/lp_bld_tgsi.c \
> gallivm/lp_bld_tgsi_aos.c \
> gallivm/lp_bld_tgsi_info.c \
> gallivm/lp_bld_tgsi_soa.c \
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_action.c
> b/src/gallium/auxiliary/gallivm/lp_bld_action.c
> new file mode 100644
> index 0000000..0b6cc77
> --- /dev/null
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_action.c
I'd prefer a source name that clearly states it is tgsi related. For example lp_bld_tgsi_soa_action.c
> @@ -0,0 +1,1182 @@
> +/**************************************************************************
> + *
> + * Copyright 2010-2011 Advanced Micro Devices, Inc.
> + * Copyright 2009 VMware, Inc.
> + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
> + * All Rights Reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person
> obtaining a
> + * copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction,
> including
> + * without limitation the rights to use, copy, modify, merge,
> publish,
> + * distribute, sub license, and/or sell copies of the Software, and
> to
> + * permit persons to whom the Software is furnished to do so,
> subject to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice (including
> the
> + * next paragraph) shall be included in all copies or substantial
> portions
> + * of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS
> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> NON-INFRINGEMENT.
> + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE
> LIABLE FOR
> + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
> CONTRACT,
> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> +
> **************************************************************************/
> +
> +/**
> + * @file
> + * TGSI to LLVM IR translation.
> + *
> + * @author Jose Fonseca <jfonseca at vmware.com>
> + * @author Tom Stellard <thomas.stellard at amd.com>
> + *
> + * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
> + * Brian Paul, and others.
> + */
> +
> +
> +#include "lp_bld_action.h"
> +
> +#include "lp_bld_tgsi.h"
> +#include "lp_bld_arit.h"
> +#include "lp_bld_const.h"
> +#include "lp_bld_gather.h"
> +#include "lp_bld_logic.h"
> +
> +#include "tgsi/tgsi_exec.h"
> +
> +/* XXX: The CPU only defaults should be repaced by generic ones. In
> most
> + * cases, the CPU defaults are just wrappers around a function in
> + * lp_build_arit.c and these functions should be inlined here and
> the CPU
> + * generic code should be removed and placed elsewhere.
> + */
> +
> +/* Default actions */
> +
> +/* Generic fetch_arg functions */
> +
> +static void scalar_unary_fetch_args(
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + /* src0.x */
> + emit_data->args[0] = lp_build_emit_fetch(bld_base,
> emit_data->inst, 0, 0);
> + emit_data->arg_count = 1;
> + emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
> +}
> +
> +static void scalar_binary_fetch_args(
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + /* src0.x */
> + emit_data->args[0] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> + 0, TGSI_CHAN_X);
> + /* src1.x */
> + emit_data->args[1] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> + 1, TGSI_CHAN_X);
> + emit_data->arg_count = 2;
> + emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_ADD */
> +static void
> +add_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] = LLVMBuildFAdd(
> + bld_base->base.gallivm->builder,
> + emit_data->args[0],
> emit_data->args[1], "");
> +}
> +
> +/* TGSI_OPCODE_ARR */
> +static void
> +arr_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_emit_llvm_unary(bld_base,
> + TGSI_OPCODE_ROUND,
> emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_CLAMP */
> +static void
> +clamp_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + LLVMValueRef tmp;
> + tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
> + emit_data->args[0],
> + emit_data->args[1]);
> + emit_data->output[emit_data->chan] =
> lp_build_emit_llvm_binary(bld_base,
> + TGSI_OPCODE_MIN, tmp,
> emit_data->args[2]);
> +}
> +
> +/* DP* Helper */
> +
> +static void
> +dp_fetch_args(
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data,
> + unsigned dp_components)
> +{
> + unsigned chan, src;
> + for (src = 0; src < 2; src++) {
> + for (chan = 0; chan < dp_components; chan++) {
> + emit_data->args[(src * dp_components) + chan] =
> + lp_build_emit_fetch(bld_base, emit_data->inst,
> src, chan);
> + }
> + }
> + emit_data->dst_type = bld_base->base.elem_type;
> +}
> +
> +/* TGSI_OPCODE_DP2 */
> +static void
> +dp2_fetch_args(
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + dp_fetch_args(bld_base, emit_data, 2);
> +}
> +
> +static void
> +dp2_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + LLVMValueRef tmp0, tmp1;
> + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> + emit_data->args[0] /* src0.x */,
> + emit_data->args[2] /* src1.x
> */);
> + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> + emit_data->args[1] /* src0.y */,
> + emit_data->args[3] /* src1.y
> */);
> + emit_data->output[emit_data->chan] =
> lp_build_emit_llvm_binary(bld_base,
> + TGSI_OPCODE_ADD,
> tmp0, tmp1);
> +}
> +
> +static struct lp_build_opcode_action dp2_action = {
> + .fetch_args = dp2_fetch_args,
> + .emit = dp2_emit
> +};
> +
> +/* TGSI_OPCODE_DP2A */
> +static void
> +dp2a_fetch_args(
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + dp_fetch_args(bld_base, emit_data, 2);
> + emit_data->args[5] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> + 2, TGSI_CHAN_X);
> +}
> +
> +static void
> +dp2a_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + LLVMValueRef tmp;
> + tmp = lp_build_emit_llvm(bld_base, TGSI_OPCODE_DP2, emit_data);
> + emit_data->output[emit_data->chan] =
> lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD,
> + emit_data->args[5], tmp);
> +}
> +
> +static struct lp_build_opcode_action dp2a_action = {
> + .fetch_args = dp2a_fetch_args,
> + .emit = dp2a_emit
> +};
> +
> +/* TGSI_OPCODE_DP3 */
> +static void
> +dp3_fetch_args(
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + dp_fetch_args(bld_base, emit_data, 3);
> +}
> +
> +static void
> +dp3_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + LLVMValueRef tmp0, tmp1;
> + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> + emit_data->args[0] /* src0.x */,
> + emit_data->args[3] /* src1.x
> */);
> + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> + emit_data->args[1] /* src0.y */,
> + emit_data->args[4] /* src1.y
> */);
> + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp1,
> tmp0);
> + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> + emit_data->args[2] /* src0.z */,
> + emit_data->args[5] /* src1.z
> */);
> + emit_data->output[emit_data->chan] =
> lp_build_emit_llvm_binary(bld_base,
> + TGSI_OPCODE_ADD,
> tmp0, tmp1);
> +}
> +
> +static struct lp_build_opcode_action dp3_action = {
> + .fetch_args = dp3_fetch_args,
> + .emit = dp3_emit
> +};
> +
> +/* TGSI_OPCODDE_DP4 */
> +
> +static void
> +dp4_fetch_args(
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + dp_fetch_args(bld_base, emit_data, 4);
> +}
> +
> +static void
> +dp4_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + LLVMValueRef tmp0, tmp1;
> + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> + emit_data->args[0] /* src0.x */,
> + emit_data->args[4] /* src1.x
> */);
> + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> + emit_data->args[1] /* src0.y */,
> + emit_data->args[5] /* src1.y
> */);
> + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0,
> tmp1);
> + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> + emit_data->args[2] /* src0.z */,
> + emit_data->args[6] /* src1.z
> */);
> + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0,
> tmp1);
> + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> + emit_data->args[3] /* src0.w */,
> + emit_data->args[7] /* src1.w
> */);
> + emit_data->output[emit_data->chan] =
> lp_build_emit_llvm_binary(bld_base,
> + TGSI_OPCODE_ADD,
> tmp0, tmp1);
> +}
> +
> +static struct lp_build_opcode_action dp4_action = {
> + .fetch_args = dp4_fetch_args,
> + .emit = dp4_emit
> +};
> +
> +/* TGSI_OPCODE_DPH */
> +static void
> +dph_fetch_args(
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + dp_fetch_args(bld_base, emit_data, 4);
> + /* src0.w */
> + emit_data->args[3] = bld_base->base.one;
> +}
> +
> +const struct lp_build_opcode_action dph_action = {
> + .fetch_args = dph_fetch_args,
> + .emit = dp4_emit
> +};
> +
> +/* TGSI_OPCODE_DST */
> +static void
> +dst_fetch_args(
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + /* src0.y */
> + emit_data->args[0] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> + 0, TGSI_CHAN_Y);
> + /* src0.z */
> + emit_data->args[1] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> + 0, TGSI_CHAN_Z);
> + /* src1.y */
> + emit_data->args[2] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> + 1, TGSI_CHAN_Y);
> + /* src1.w */
> + emit_data->args[3] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> + 1, TGSI_CHAN_W);
> +}
> +
> +static void
> +dst_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + /* dst.x */
> + emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
> +
> + /* dst.y */
> + emit_data->output[TGSI_CHAN_Y] =
> lp_build_emit_llvm_binary(bld_base,
> + TGSI_OPCODE_MUL,
> + emit_data->args[0] /*
> src0.y */,
> + emit_data->args[2] /*
> src1.y */);
> + /* dst.z */
> + emit_data->output[TGSI_CHAN_Z] = emit_data->args[1]; /* src0.z */
> +
> + /* dst.w */
> + emit_data->output[TGSI_CHAN_W] = emit_data->args[3]; /* src1.w */
> +}
> +
> +static struct lp_build_opcode_action dst_action = {
> + .fetch_args = dst_fetch_args,
> + .emit = dst_emit
> +};
> +
> +/* TGSI_OPCODE_END */
> +static void
> +end_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + bld_base->pc = -1;
> +}
> +
> +/* TGSI_OPCODE_EXP */
> +
> +static void
> +exp_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + LLVMValueRef floor_x;
> +
> + /* floor( src0.x ) */
> + floor_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
> + emit_data->args[0]);
> +
> + /* 2 ^ floor( src0.x ) */
> + emit_data->output[TGSI_CHAN_X] =
> lp_build_emit_llvm_unary(bld_base,
> + TGSI_OPCODE_EX2, floor_x);
> +
> + /* src0.x - floor( src0.x ) */
> + emit_data->output[TGSI_CHAN_Y] =
> lp_build_emit_llvm_binary(bld_base,
> + TGSI_OPCODE_SUB, emit_data->args[0] /* src0.x
> */, floor_x);
> +
> + /* 2 ^ src0.x */
> + emit_data->output[TGSI_CHAN_Z] =
> lp_build_emit_llvm_unary(bld_base,
> + TGSI_OPCODE_EX2, emit_data->args[0] /*
> src0.x */);
> +
> + emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
> +}
> +
> +const struct lp_build_opcode_action exp_action = {
> + .fetch_args = scalar_unary_fetch_args,
> + .emit = exp_emit
> +};
> +
> +/* TGSI_OPCODE_FRC */
> +
> +static void
> +frc_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + LLVMValueRef tmp;
> + tmp = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
> + emit_data->args[0]);
> + emit_data->output[emit_data->chan] =
> lp_build_emit_llvm_binary(bld_base,
> + TGSI_OPCODE_SUB,
> emit_data->args[0], tmp);
> +}
> +
> +/* TGSI_OPCODE_KIL */
> +
> +static void
> +kil_fetch_args(
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + /* src0.x */
> + emit_data->args[0] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> + 0, TGSI_CHAN_X);
> + /* src0.y */
> + emit_data->args[1] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> + 0, TGSI_CHAN_Y);
> + /* src0.z */
> + emit_data->args[2] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> + 0, TGSI_CHAN_Z);
> + /* src0.w */
> + emit_data->args[3] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> + 0, TGSI_CHAN_W);
> + emit_data->arg_count = 4;
> + emit_data->dst_type =
> LLVMVoidTypeInContext(bld_base->base.gallivm->context);
> +}
> +
> +/* TGSI_OPCODE_KILP */
> +
> +static void
> +kilp_fetch_args(
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->dst_type =
> LLVMVoidTypeInContext(bld_base->base.gallivm->context);
> +}
> +
> +/* TGSI_OPCODE_LIT */
> +
> +static void
> +lit_fetch_args(
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + /* src0.x */
> + emit_data->args[0] = lp_build_emit_fetch(bld_base,
> emit_data->inst, 0, TGSI_CHAN_X);
> + /* src0.y */
> + emit_data->args[1] = lp_build_emit_fetch(bld_base,
> emit_data->inst, 0, TGSI_CHAN_Y);
> + /* src0.w */
> + emit_data->args[2] = lp_build_emit_fetch(bld_base,
> emit_data->inst, 0, TGSI_CHAN_W);
> + emit_data->arg_count = 3;
> +}
> +
> +static void
> +lit_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + LLVMValueRef tmp0, tmp2;
> +
> + /* dst.x */
> + emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
> +
> + /* dst. y */
> + emit_data->output[TGSI_CHAN_Y] =
> lp_build_emit_llvm_binary(bld_base,
> + TGSI_OPCODE_MAX,
> + emit_data->args[0] /*
> src0.x */,
> + bld_base->base.zero);
> +
> + /* dst.z */
> + /* XMM[1] = SrcReg[0].yyyy */
> + LLVMValueRef tmp1 = emit_data->args[1];
> + /* XMM[1] = max(XMM[1], 0) */
> + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
> + tmp1, bld_base->base.zero);
> + /* XMM[2] = SrcReg[0].wwww */
> + tmp2 = emit_data->args[2];
> + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_POW,
> + tmp1, tmp2);
> + tmp0 = emit_data->args[0];
> + emit_data->output[TGSI_CHAN_Z] =
> lp_build_emit_llvm_ternary(bld_base,
> + TGSI_OPCODE_CMP,
> + tmp0,
> bld_base->base.zero, tmp1);
> + /* dst.w */
> + emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
> +}
> +
> +static struct lp_build_opcode_action lit_action = {
> + .fetch_args = lit_fetch_args,
> + .emit = lit_emit
> +};
> +
> +/* TGSI_OPCODE_LOG */
> +
> +static void
> +log_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> +
> + LLVMValueRef abs_x, log_abs_x, flr_log_abs_x, ex2_flr_log_abs_x;
> +
> + /* abs( src0.x) */
> + abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
> + emit_data->args[0] /* src0.x
> */);
> +
> + /* log( abs( src0.x ) ) */
> + log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_LG2,
> + abs_x);
> +
> + /* floor( log( abs( src0.x ) ) ) */
> + flr_log_abs_x = lp_build_emit_llvm_unary(bld_base,
> TGSI_OPCODE_FLR,
> + log_abs_x);
> + /* dst.x */
> + emit_data->output[TGSI_CHAN_X] = flr_log_abs_x;
> +
> + /* dst.y */
> + ex2_flr_log_abs_x = lp_build_emit_llvm_unary(bld_base,
> TGSI_OPCODE_EX2,
> + flr_log_abs_x);
> +
> + /* abs( src0.x ) / 2^( floor( lg2( abs( src0.x ) ) ) ) */
> + emit_data->output[TGSI_CHAN_Y] =
> lp_build_emit_llvm_binary(bld_base,
> + TGSI_OPCODE_DIV, abs_x,
> ex2_flr_log_abs_x);
> +
> + /* dst.x */
> + emit_data->output[TGSI_CHAN_Z] = log_abs_x;
> +
> + /* dst.w */
> + emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
> +}
> +
> +static struct lp_build_opcode_action log_action = {
> + .fetch_args = scalar_unary_fetch_args,
> + .emit = log_emit
> +};
> +
> +/* TGSI_OPCODE_LRP */
> +
> +static void
> +lrp_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + LLVMValueRef tmp;
> + tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB,
> + emit_data->args[1],
> + emit_data->args[2]);
> + emit_data->output[emit_data->chan] =
> lp_build_emit_llvm_ternary(bld_base,
> + TGSI_OPCODE_MAD, emit_data->args[0], tmp,
> emit_data->args[2]);
> +}
> +
> +/* TGSI_OPCODE_MAD */
> +
> +static void
> +mad_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + LLVMValueRef tmp;
> + tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> + emit_data->args[0],
> + emit_data->args[1]);
> + emit_data->output[emit_data->chan] =
> lp_build_emit_llvm_binary(bld_base,
> + TGSI_OPCODE_ADD, tmp,
> emit_data->args[2]);
> +}
> +
> +/* TGSI_OPCODE_MOV */
> +
> +static void
> +mov_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] = emit_data->args[0];
> +}
> +
> +/* TGSI_OPCODE_MUL */
> +static void
> +mul_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_mul(&bld_base->base,
> + emit_data->args[0],
> emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_POW */
> +
> +static void
> +pow_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_pow(&bld_base->base,
> + emit_data->args[0],
> emit_data->args[1]);
> +}
> +
> +static struct lp_build_opcode_action pow_action = {
> + .fetch_args = scalar_binary_fetch_args,
> + .emit = pow_emit
> +};
> +
> +/* TGSI_OPCODE_RSQ */
> +
> +static void
> +rsq_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->args[0] = lp_build_emit_llvm_unary(bld_base,
> TGSI_OPCODE_ABS,
> + emit_data->args[0]);
> + if (bld_base->rsq_action.emit) {
> + bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base,
> emit_data);
> + } else {
> + emit_data->output[emit_data->chan] = bld_base->base.undef;
> + }
> +}
> +
> +const struct lp_build_opcode_action rsq_action = {
> + .fetch_args = scalar_unary_fetch_args,
> + .emit = rsq_emit
> +
> +};
> +
> +/* TGSI_OPCODE_SCS */
> +static void
> +scs_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + /* dst.x */
> + emit_data->output[TGSI_CHAN_X] =
> lp_build_emit_llvm_unary(bld_base,
> + TGSI_OPCODE_COS,
> emit_data->args[0]);
> + /* dst.y */
> + emit_data->output[TGSI_CHAN_Y] =
> lp_build_emit_llvm_unary(bld_base,
> + TGSI_OPCODE_SIN,
> emit_data->args[0]);
> + /* dst.z */
> + emit_data->output[TGSI_CHAN_Z] = bld_base->base.zero;
> +
> + /* dst.w */
> + emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
> +}
> +
> +const struct lp_build_opcode_action scs_action = {
> + .fetch_args = scalar_unary_fetch_args,
> + .emit = scs_emit
> +};
> +
> +/* TGSI_OPCODE_SFL */
> +
> +static void
> +sfl_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] = bld_base->base.zero;
> +}
> +
> +/* TGSI_OPCODE_STR */
> +
> +static void
> +str_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] = bld_base->base.one;
> +}
> +
> +/* TGSI_OPCODE_SUB */
> +static void
> +sub_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] = LLVMBuildFSub(
> + bld_base->base.gallivm->builder,
> + emit_data->args[0],
> + emit_data->args[1], "");
> +}
> +
> +/* TGSI_OPCODE_XPD */
> +
> +static void
> +xpd_fetch_args(
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + dp_fetch_args(bld_base, emit_data, 3);
> +}
> +
> +/**
> + * (a * b) - (c * d)
> + */
> +static LLVMValueRef
> +xpd_helper(
> + struct lp_build_tgsi_context * bld_base,
> + LLVMValueRef a,
> + LLVMValueRef b,
> + LLVMValueRef c,
> + LLVMValueRef d)
> +{
> + LLVMValueRef tmp0, tmp1;
> +
> + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, a,
> b);
> + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, c,
> d);
> +
> + return lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB, tmp0,
> tmp1);
> +}
> +
> +static void
> +xpd_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[TGSI_CHAN_X] = xpd_helper(bld_base,
> + emit_data->args[1] /* src0.y */, emit_data->args[5] /*
> src1.z */,
> + emit_data->args[4] /* src1.y */, emit_data->args[2] /*
> src0.z */);
> +
> + emit_data->output[TGSI_CHAN_Y] = xpd_helper(bld_base,
> + emit_data->args[2] /* src0.z */, emit_data->args[3] /*
> src1.x */,
> + emit_data->args[5] /* src1.z */, emit_data->args[0] /*
> src0.x */);
> +
> + emit_data->output[TGSI_CHAN_Z] = xpd_helper(bld_base,
> + emit_data->args[0] /* src0.x */, emit_data->args[4] /*
> src1.y */,
> + emit_data->args[3] /* src1.x */, emit_data->args[1] /*
> src0.y */);
> +
> + emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
> +}
> +
> +const struct lp_build_opcode_action xpd_action = {
> + .fetch_args = xpd_fetch_args,
> + .emit = xpd_emit
> +};
> +
> +void
> +lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
> +{
> + bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action;
> + bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action;
> + bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action;
> + bld_base->op_actions[TGSI_OPCODE_DP2A] = dp2a_action;
> + bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action;
> + bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
> + bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
> + bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
> + bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
> + bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
> + bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
> + bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
> + bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
> +
> + bld_base->op_actions[TGSI_OPCODE_COS].fetch_args =
> scalar_unary_fetch_args;
> + bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args =
> scalar_unary_fetch_args;
> + bld_base->op_actions[TGSI_OPCODE_IF].fetch_args =
> scalar_unary_fetch_args;
> + bld_base->op_actions[TGSI_OPCODE_KIL].fetch_args =
> kil_fetch_args;
> + bld_base->op_actions[TGSI_OPCODE_KILP].fetch_args =
> kilp_fetch_args;
> + bld_base->op_actions[TGSI_OPCODE_RCP].fetch_args =
> scalar_unary_fetch_args;
> + bld_base->op_actions[TGSI_OPCODE_SIN].fetch_args =
> scalar_unary_fetch_args;
> + bld_base->op_actions[TGSI_OPCODE_LG2].fetch_args =
> scalar_unary_fetch_args;
> +
> + bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit;
> + bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit;
> + bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = clamp_emit;
> + bld_base->op_actions[TGSI_OPCODE_END].emit = end_emit;
> + bld_base->op_actions[TGSI_OPCODE_FRC].emit = frc_emit;
> + bld_base->op_actions[TGSI_OPCODE_LRP].emit = lrp_emit;
> + bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit;
> + bld_base->op_actions[TGSI_OPCODE_MOV].emit = mov_emit;
> + bld_base->op_actions[TGSI_OPCODE_MUL].emit = mul_emit;
> + bld_base->op_actions[TGSI_OPCODE_SFL].emit = sfl_emit;
> + bld_base->op_actions[TGSI_OPCODE_STR].emit = str_emit;
> + bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit;
> +}
> +
> +/* CPU Only default actions */
> +
> +/* These actions are CPU only, because they could potentially output
> SSE
> + * intrinsics.
> + */
> +
> +/* TGSI_OPCODE_ABS (CPU Only)*/
> +
> +static void
> +abs_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_abs(&bld_base->base,
> +
> emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_ADD (CPU Only) */
> +static void
> +add_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_add(&bld_base->base,
> + emit_data->args[0],
> emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_CEIL (CPU Only) */
> +static void
> +ceil_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_trunc(&bld_base->base,
> +
> emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_CMP (CPU Only) */
> +static void
> +cmp_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + LLVMValueRef cond = lp_build_cmp(&bld_base->base, PIPE_FUNC_LESS,
> + emit_data->args[0],
> bld_base->base.zero);
> + emit_data->output[emit_data->chan] =
> lp_build_select(&bld_base->base,
> + cond, emit_data->args[1],
> emit_data->args[2]);
> +}
> +
> +/* TGSI_OPCODE_CND (CPU Only) */
> +static void
> +cnd_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + LLVMValueRef half, tmp;
> + half = lp_build_const_vec(bld_base->base.gallivm,
> bld_base->base.type, 0.5);
> + tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_GREATER,
> + emit_data->args[2], half);
> + emit_data->output[emit_data->chan] =
> lp_build_select(&bld_base->base,
> + tmp,
> + emit_data->args[0],
> + emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_COS (CPU Only) */
> +static void
> +cos_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_cos(&bld_base->base,
> +
> emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_DIV (CPU Only) */
> +static void
> +div_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_div(&bld_base->base,
> + emit_data->args[0],
> emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_EX2 (CPU Only) */
> +static void
> +ex2_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_exp2(&bld_base->base,
> +
> emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_EXP (CPU Only) */
> +static void
> +exp_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + lp_build_exp2_approx(&bld_base->base, emit_data->args[0],
> + &emit_data->output[TGSI_CHAN_X],
> + &emit_data->output[TGSI_CHAN_Y],
> + &emit_data->output[TGSI_CHAN_Z]);
> + emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
> +}
> +
> +/* TGSI_OPCODE_FLR (CPU Only) */
> +
> +static void
> +flr_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_floor(&bld_base->base,
> +
> emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_LG2 (CPU Only) */
> +static void
> +lg2_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_log2(&bld_base->base,
> +
> emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_LOG (CPU Only) */
> +static void
> +log_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + LLVMValueRef p_floor_log2;
> + LLVMValueRef p_exp;
> + LLVMValueRef p_log2;
> + LLVMValueRef src0 = emit_data->args[0];
> +
> + lp_build_log2_approx(&bld_base->base, src0,
> + &p_exp, &p_floor_log2, &p_log2);
> +
> + emit_data->output[TGSI_CHAN_X] = p_floor_log2;
> +
> + emit_data->output[TGSI_CHAN_Y] =
> lp_build_emit_llvm_binary(bld_base,
> + TGSI_OPCODE_DIV,
> + src0, p_exp);
> + emit_data->output[TGSI_CHAN_Z] = p_log2;
> +
> + emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
> +
> +}
> +
> +/* TGSI_OPCODE_MAX (CPU Only) */
> +
> +static void
> +max_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_max(&bld_base->base,
> + emit_data->args[0],
> emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_MIN (CPU Only) */
> +static void
> +min_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_min(&bld_base->base,
> + emit_data->args[0],
> emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_POW (CPU Only) */
> +static void
> +pow_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_pow(&bld_base->base,
> + emit_data->args[0],
> emit_data->args[1]);
> +}
> +
> +
> +/* TGSI_OPCODE_RCP (CPU Only) */
> +
> +static void
> +rcp_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_rcp(&bld_base->base,
> +
> emit_data->args[0]);
> +}
> +
> +/* Reciprical squareroot (CPU Only) */
> +
> +/* This is not the same as TGSI_OPCODE_RSQ, which requres the
> argument to be
> + * greater than or equal to 0 */
> +static void
> +recip_sqrt_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_rsqrt(&bld_base->base,
> +
> emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_ROUND (CPU Only) */
> +static void
> +round_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_round(&bld_base->base,
> +
> emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_SET Helper (CPU Only) */
> +
> +static void
> +set_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data,
> + unsigned pipe_func)
> +{
> + LLVMValueRef cond = lp_build_cmp(&bld_base->base, pipe_func,
> + emit_data->args[0],
> emit_data->args[1]);
> + emit_data->output[emit_data->chan] =
> lp_build_select(&bld_base->base,
> + cond,
> + bld_base->base.one,
> + bld_base->base.zero);
> +}
> +
> +/* TGSI_OPCODE_SEQ (CPU Only) */
> +
> +static void
> +seq_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
> +}
> +
> +/* TGSI_OPCODE_SGE (CPU Only) */
> +static void
> +sge_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
> +}
> +
> +/* TGSI_OPCODE_SGT (CPU Only)*/
> +
> +static void
> +sgt_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GREATER);
> +}
> +
> +/* TGSI_OPCODE_SIN (CPU Only) */
> +static void
> +sin_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_sin(&bld_base->base,
> +
> emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_SLE (CPU Only) */
> +static void
> +sle_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LEQUAL);
> +}
> +
> +/* TGSI_OPCODE_SLT (CPU Only) */
> +
> +static void
> +slt_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
> +}
> +
> +/* TGSI_OPCODE_SNE (CPU Only) */
> +
> +static void
> +sne_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
> +}
> +
> +/* TGSI_OPCODE_SSG (CPU Only) */
> +
> +static void
> +ssg_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_sgn(&bld_base->base,
> +
> emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_SUB (CPU Only) */
> +
> +static void
> +sub_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_sub(&bld_base->base,
> +
> emit_data->args[0],
> +
> emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_TRUNC (CPU Only) */
> +
> +static void
> +trunc_emit_cpu(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + emit_data->output[emit_data->chan] =
> lp_build_trunc(&bld_base->base,
> +
> emit_data->args[0]);
> +}
> +
> +void
> +lp_set_default_actions_cpu(
> + struct lp_build_tgsi_context * bld_base)
> +{
> + lp_set_default_actions(bld_base);
> + bld_base->op_actions[TGSI_OPCODE_ABS].emit = abs_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_ARL].emit = flr_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_CEIL].emit = ceil_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_CND].emit = cnd_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_COS].emit = cos_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_EXP].emit = exp_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_MAX].emit = max_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_MIN].emit = min_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_POW].emit = pow_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_ROUND].emit = round_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_SEQ].emit = seq_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_SGE].emit = sge_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_SGT].emit = sgt_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_SIN].emit = sin_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_SLE].emit = sle_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_SLT].emit = slt_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_SNE].emit = sne_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_SSG].emit = ssg_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = trunc_emit_cpu;
> +
> + bld_base->rsq_action.emit = recip_sqrt_emit_cpu;
> +}
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_action.h
> b/src/gallium/auxiliary/gallivm/lp_bld_action.h
> new file mode 100644
> index 0000000..5495a86
> --- /dev/null
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_action.h
> @@ -0,0 +1,138 @@
> +/*
> + * Copyright 2010-2011 Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person
> obtaining a
> + * copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction,
> including
> + * without limitation the rights to use, copy, modify, merge,
> publish,
> + * distribute, sub license, and/or sell copies of the Software, and
> to
> + * permit persons to whom the Software is furnished to do so,
> subject to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice (including
> the
> + * next paragraph) shall be included in all copies or substantial
> portions
> + * of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS
> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> NON-INFRINGEMENT.
> + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE
> LIABLE FOR
> + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
> CONTRACT,
> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> +
> **************************************************************************/
> +
> +/**
> + *
> + * @author Tom Stellard <thomas.stellard at amd.com>
> + *
> + */
> +
> +
> +#ifndef LP_BLD_ACTION_H
> +#define LP_BLD_ACTION_H
> +
> +#include <llvm-c/Core.h>
> +
> +struct lp_build_tgsi_context;
> +
> +struct lp_build_emit_data {
> + /** Arguments that are passed to lp_build_opcode_action::emit.
> The
> + * order of the arguments should be as follows:
> + * SOA: s0.x, s0.y, s0.z, s0.w, s1.x, s1.y, s1.z, s1.w, s2.x,
> s2.y, s2.x, s2.w
> + * AOS: s0.xyzw, s1.xyzw, s2.xyzw
> + * TEXTURE Instructions: coord.xyzw
> + *
> + * Arguments should be packed into the args array. For example
> an SOA
> + * instructions that reads s0.x and s1.x args should look like
> this:
> + * args[0] = s0.x;
> + * args[1] = s1.x;
> + */
> + LLVMValueRef args[12];
> +
> + /**
> + * Number of arguments in the args array.
> + */
> + unsigned arg_count;
> +
> + /**
> + * The type output type of the opcode. This should be set in the
> + * lp_build_opcode_action::fetch_args function.
> + */
> + LLVMTypeRef dst_type;
> +
> + /** This is used by the lp_build_opcode_action::fetch_args
> function to
> + * determine which channel to read from the opcode arguments. It
> also
> + * specifies which index of the output array should be written to
> by
> + * the lp_build_opcode_action::emit function. However, this
> value is
> + * usually ignored by any opcodes that are not
> TGSI_OUTPUT_COMPONENTWISE.
> + */
> + unsigned chan;
> +
> + /** The lp_build_opcode_action::emit 'executes' the opcode and
> writes the
> + * results to this array.
> + */
> + LLVMValueRef output[4];
> +
> + /**
> + * The current instruction that is being 'executed'.
> + */
> + const struct tgsi_full_instruction * inst;
> + const struct tgsi_opcode_info * info;
> +};
> +
> +struct lp_build_opcode_action
> +{
> +
> + /**
> + * This function is responsible for doing 2-3 things:
> + * 1. Fetching the instruction arguments into the emit_data->args
> array.
> + * 2. Setting the number of arguments in emit_data->arg_count.
> + * 3. Setting the destination type in emit_data->dst_type
> (usually only
> + * necessary for opcodes that are TGSI_OUTPUT_COMPONENTWISE).
> + */
> + void (*fetch_args)(struct lp_build_tgsi_context *,
> + struct lp_build_emit_data *);
> +
> +
> + /**
> + * This function is responsible for emitting LLVM IR for a TGSI
> opcode.
> + * It should store the values it generates in the
> emit_data->output array
> + * and for TGSI_OUTPUT_COMPONENTWISE and TGSI_OUTPUT_REPLICATE
> instructions
> + * (and possibly others depending on the specific
> implementation), it should
> + * make sure to store the values in the array slot indexed by
> emit_data->chan.
> + */
> + void (*emit)(const struct lp_build_opcode_action *,
> + struct lp_build_tgsi_context *,
> + struct lp_build_emit_data *);
> +
> + /**
> + * This variable can be used to store an intrinsic name, in case
> the TGSI
> + * opcode will be replaced by a target specific intrinsic.
> (There is a
> + * convenience function in lp_bld_tgsi.c called
> lp_build_tgsi_intrinsic()
> + * that can be assigned to lp_build_opcode_action::emit and used
> for
> + * generating intrinsics).
> + */
> + const char * intr_name;
> +};
> +
> +/**
> + * This function initializes the bld_base->op_actions array with
> some
> + * generic operand actions.
> + */
> +void
> +lp_set_default_actions(
> + struct lp_build_tgsi_context * bld_base);
> +
> +/*
> + * This function initialize the bld_base->op_actions array with some
> + * operand actions that are intended only for use when generating
> + * instructions to be executed on a CPU.
> + */
> +void
> +lp_set_default_actions_cpu(
> + struct lp_build_tgsi_context * bld_base);
> +
> +#endif /* LP_BLD_ACTION_H */
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> new file mode 100644
> index 0000000..9ec4a9b
> --- /dev/null
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> @@ -0,0 +1,409 @@
> +/**************************************************************************
> + *
> + * Copyright 2010-2011 Advanced Micro Devices, Inc.
> + * Copyright 2010 VMware, Inc.
> + * Copyright 2009 VMware, Inc.
> + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
> + * All Rights Reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person
> obtaining a
> + * copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction,
> including
> + * without limitation the rights to use, copy, modify, merge,
> publish,
> + * distribute, sub license, and/or sell copies of the Software, and
> to
> + * permit persons to whom the Software is furnished to do so,
> subject to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice (including
> the
> + * next paragraph) shall be included in all copies or substantial
> portions
> + * of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS
> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> NON-INFRINGEMENT.
> + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
> + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
> CONTRACT,
> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> +
> **************************************************************************/
> +
> +#include "gallivm/lp_bld_tgsi.h"
> +
> +#include "gallivm/lp_bld_arit.h"
> +#include "gallivm/lp_bld_gather.h"
> +#include "gallivm/lp_bld_init.h"
> +#include "gallivm/lp_bld_intr.h"
> +#include "tgsi/tgsi_info.h"
> +#include "tgsi/tgsi_parse.h"
> +#include "tgsi/tgsi_util.h"
> +#include "util/u_memory.h"
> +
> +/* The user is responsible for freeing list->instructions */
> +unsigned lp_bld_tgsi_list_init(struct lp_build_tgsi_context *
> bld_base)
> +{
> + bld_base->instructions = (struct tgsi_full_instruction *)
> + MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct
> tgsi_full_instruction) );
> + if (!bld_base->instructions) {
> + return 0;
> + }
> + bld_base->max_instructions = LP_MAX_INSTRUCTIONS;
> + return 1;
> +}
> +
> +
> +unsigned lp_bld_tgsi_add_instruction(
> + struct lp_build_tgsi_context * bld_base,
> + struct tgsi_full_instruction *inst_to_add)
> +{
> +
> + if (bld_base->num_instructions == bld_base->max_instructions) {
> + struct tgsi_full_instruction *instructions;
> + instructions = REALLOC(bld_base->instructions,
> bld_base->max_instructions
> + * sizeof(struct
> tgsi_full_instruction),
> + (bld_base->max_instructions +
> LP_MAX_INSTRUCTIONS)
> + * sizeof(struct
> tgsi_full_instruction));
> + if (!instructions) {
> + return 0;
> + }
> + bld_base->instructions = instructions;
> + bld_base->max_instructions += LP_MAX_INSTRUCTIONS;
> + }
> + memcpy(bld_base->instructions + bld_base->num_instructions,
> inst_to_add,
> + sizeof(bld_base->instructions[0]));
> +
> + bld_base->num_instructions++;
> +
> + return 1;
> +}
> +
> +
> +/**
> + * This function assumes that all the args in emit_data have been
> set.
> + */
> +static void
> +lp_build_action_set_dst_type(
> + struct lp_build_emit_data * emit_data,
> + struct lp_build_tgsi_context *bld_base,
> + unsigned tgsi_opcode)
> +{
> + if (emit_data->arg_count == 0) {
> + emit_data->dst_type =
> LLVMVoidTypeInContext(bld_base->base.gallivm->context);
> + } else {
> + /* XXX: Not all opcodes have the same src and dst types. */
> + emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
> + }
> +}
> +
> +void
> +lp_build_tgsi_intrinsic(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_context * base = &bld_base->base;
> + emit_data->output[emit_data->chan] = lp_build_intrinsic(
> + base->gallivm->builder, action->intr_name,
> + emit_data->dst_type, emit_data->args,
> emit_data->arg_count);
> +}
> +
> +LLVMValueRef
> +lp_build_emit_llvm(
> + struct lp_build_tgsi_context *bld_base,
> + unsigned tgsi_opcode,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_opcode_action * action =
> &bld_base->op_actions[tgsi_opcode];
> + /* XXX: Assert that this is a componentwise or replicate
> instruction */
> +
> + lp_build_action_set_dst_type(emit_data, bld_base, tgsi_opcode);
> + emit_data->chan = 0;
> + assert(action->emit);
> + action->emit(action, bld_base, emit_data);
> + return emit_data->output[0];
> +}
> +
> +LLVMValueRef
> +lp_build_emit_llvm_unary(
> + struct lp_build_tgsi_context *bld_base,
> + unsigned tgsi_opcode,
> + LLVMValueRef arg0)
> +{
> + struct lp_build_emit_data emit_data;
> + emit_data.arg_count = 1;
> + emit_data.args[0] = arg0;
> + return lp_build_emit_llvm(bld_base, tgsi_opcode, &emit_data);
> +}
> +
> +LLVMValueRef
> +lp_build_emit_llvm_binary(
> + struct lp_build_tgsi_context *bld_base,
> + unsigned tgsi_opcode,
> + LLVMValueRef arg0,
> + LLVMValueRef arg1)
> +{
> + struct lp_build_emit_data emit_data;
> + emit_data.arg_count = 2;
> + emit_data.args[0] = arg0;
> + emit_data.args[1] = arg1;
> + return lp_build_emit_llvm(bld_base, tgsi_opcode, &emit_data);
> +}
> +
> +LLVMValueRef
> +lp_build_emit_llvm_ternary(
> + struct lp_build_tgsi_context *bld_base,
> + unsigned tgsi_opcode,
> + LLVMValueRef arg0,
> + LLVMValueRef arg1,
> + LLVMValueRef arg2)
> +{
> + struct lp_build_emit_data emit_data;
> + emit_data.arg_count = 3;
> + emit_data.args[0] = arg0;
> + emit_data.args[1] = arg1;
> + emit_data.args[2] = arg2;
> + return lp_build_emit_llvm(bld_base, tgsi_opcode, &emit_data);
> +}
> +
> +/**
> + * The default fetch implementation.
> + */
> +void lp_build_fetch_args(
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + unsigned src;
> + for (src = 0; src < emit_data->info->num_src; src++) {
> + emit_data->args[src] = lp_build_emit_fetch(bld_base,
> emit_data->inst, src,
> + emit_data->chan);
> + }
> + emit_data->arg_count = emit_data->info->num_src;
> + lp_build_action_set_dst_type(emit_data, bld_base,
> + emit_data->inst->Instruction.Opcode);
> +}
> +
> +/* XXX: COMMENT
> + * It should be assumed that this function ignores writemasks
> + */
> +boolean
> +lp_build_tgsi_inst_llvm(
> + struct lp_build_tgsi_context * bld_base,
> + const struct tgsi_full_instruction * inst)
> +{
> + unsigned tgsi_opcode = inst->Instruction.Opcode;
> + const struct tgsi_opcode_info * info =
> tgsi_get_opcode_info(tgsi_opcode);
> + const struct lp_build_opcode_action * action =
> +
> &bld_base->op_actions[tgsi_opcode];
> + struct lp_build_emit_data emit_data;
> + unsigned chan_index;
> + LLVMValueRef val;
> +
> + bld_base->pc++;
> +
> + /* Ignore deprecated instructions */
> + switch (inst->Instruction.Opcode) {
> +
> + case TGSI_OPCODE_RCC:
> + case TGSI_OPCODE_UP2H:
> + case TGSI_OPCODE_UP2US:
> + case TGSI_OPCODE_UP4B:
> + case TGSI_OPCODE_UP4UB:
> + case TGSI_OPCODE_X2D:
> + case TGSI_OPCODE_ARA:
> + case TGSI_OPCODE_BRA:
> + case TGSI_OPCODE_DIV:
> + case TGSI_OPCODE_PUSHA:
> + case TGSI_OPCODE_POPA:
> + case TGSI_OPCODE_I2F:
> + case TGSI_OPCODE_NOT:
> + case TGSI_OPCODE_SHL:
> + case TGSI_OPCODE_ISHR:
> + case TGSI_OPCODE_AND:
> + case TGSI_OPCODE_OR:
> + case TGSI_OPCODE_MOD:
> + case TGSI_OPCODE_XOR:
> + case TGSI_OPCODE_SAD:
> + case TGSI_OPCODE_TXF:
> + case TGSI_OPCODE_TXQ:
> + /* deprecated? */
> + assert(0);
> + return FALSE;
> + break;
> + }
> +
> + /* Check if the opcode has been implemented */
> + if (!action->emit) {
> + return FALSE;
> + }
> +
> + memset(&emit_data, 0, sizeof(emit_data));
> +
> + assert(info->num_dst <= 1);
> + if (info->num_dst) {
> + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> + emit_data.output[chan_index] = bld_base->base.undef;
> + }
> + }
> +
> + emit_data.inst = inst;
> + emit_data.info = info;
> +
> + /* Emit the instructions */
> + if (info->output_type == TGSI_OUTPUT_COMPONENTWISE &&
> bld_base->soa) {
> + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
> + emit_data.chan = chan_index;
> + if (!action->fetch_args) {
> + lp_build_fetch_args(bld_base, &emit_data);
> + } else {
> + action->fetch_args(bld_base, &emit_data);
> + }
> + action->emit(action, bld_base, &emit_data);
> + }
> + } else {
> + emit_data.chan = LP_CHAN_ALL;
> + if (action->fetch_args) {
> + action->fetch_args(bld_base, &emit_data);
> + }
> + /* Make sure the output value is stored in
> emit_data.output[0], unless
> + * the opcode is channel dependent */
> + if (info->output_type != TGSI_OUTPUT_CHAN_DEPENDENT) {
> + emit_data.chan = 0;
> + }
> + action->emit(action, bld_base, &emit_data);
> +
> + /* Replicate the output values */
> + if (info->output_type == TGSI_OUTPUT_REPLICATE &&
> bld_base->soa) {
> + val = emit_data.output[0];
> + memset(emit_data.output, 0, sizeof(emit_data.output));
> + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
> + emit_data.output[chan_index] = val;
> + }
> + }
> + }
> +
> + if (info->num_dst > 0) {
> + bld_base->emit_store(bld_base, inst, info, emit_data.output);
> + }
> + return TRUE;
> +}
> +
> +
> +LLVMValueRef
> +lp_build_emit_fetch(
> + struct lp_build_tgsi_context *bld_base,
> + const struct tgsi_full_instruction *inst,
> + unsigned src_op,
> + const unsigned chan_index)
> +{
> + const struct tgsi_full_src_register *reg = &inst->Src[src_op];
> + unsigned swizzle;
> + LLVMValueRef res;
> +
> + if (chan_index == LP_CHAN_ALL) {
> + swizzle = ~0;
> + } else {
> + swizzle = tgsi_util_get_full_src_register_swizzle(reg,
> chan_index);
> + if (swizzle > 3) {
> + assert(0 && "invalid swizzle in emit_fetch()");
> + return bld_base->base.undef;
> + }
> + }
> +
> + assert(reg->Register.Index <=
> bld_base->info->file_max[reg->Register.File]);
> +
> + if (bld_base->emit_fetch_funcs[reg->Register.File]) {
> + res = bld_base->emit_fetch_funcs[reg->Register.File](bld_base,
> reg,
> + swizzle);
> + } else {
> + assert(0 && "invalid src register in emit_fetch()");
> + return bld_base->base.undef;
> + }
> +
> + if (reg->Register.Absolute) {
> + res = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
> res);
> + }
> +
> + if (reg->Register.Negate) {
> + res = lp_build_negate( &bld_base->base, res );
> + }
> +
> + /*
> + * Swizzle the argument
> + */
> +
> + if (swizzle == ~0) {
> + res = bld_base->emit_swizzle(bld_base, res,
> + reg->Register.SwizzleX,
> + reg->Register.SwizzleY,
> + reg->Register.SwizzleZ,
> + reg->Register.SwizzleW);
> + }
> +
> + return res;
> +
> +}
> +
> +boolean
> +lp_build_tgsi_llvm(
> + struct lp_build_tgsi_context * bld_base,
> + const struct tgsi_token *tokens)
> +{
> + struct tgsi_parse_context parse;
> +
> + if (bld_base->emit_prologue) {
> + bld_base->emit_prologue(bld_base);
> + }
> +
> + if (!lp_bld_tgsi_list_init(bld_base)) {
> + return FALSE;
> + }
> +
> + tgsi_parse_init( &parse, tokens );
> +
> + while( !tgsi_parse_end_of_tokens( &parse ) ) {
> + tgsi_parse_token( &parse );
> +
> + switch( parse.FullToken.Token.Type ) {
> + case TGSI_TOKEN_TYPE_DECLARATION:
> + /* Inputs already interpolated */
> + bld_base->emit_declaration(bld_base,
> &parse.FullToken.FullDeclaration);
> + break;
> +
> + case TGSI_TOKEN_TYPE_INSTRUCTION:
> + lp_bld_tgsi_add_instruction(bld_base,
> &parse.FullToken.FullInstruction);
> + break;
> +
> + case TGSI_TOKEN_TYPE_IMMEDIATE:
> + bld_base->emit_immediate(bld_base,
> &parse.FullToken.FullImmediate);
> + break;
> +
> + case TGSI_TOKEN_TYPE_PROPERTY:
> + break;
> +
> + default:
> + assert( 0 );
> + }
> + }
> +
> + while (bld_base->pc != -1) {
> + struct tgsi_full_instruction *instr = bld_base->instructions +
> + bld_base->pc;
> + const struct tgsi_opcode_info *opcode_info =
> + tgsi_get_opcode_info(instr->Instruction.Opcode);
> + if (!lp_build_tgsi_inst_llvm(bld_base, instr)) {
> + _debug_printf("warning: failed to translate tgsi opcode %s
> to LLVM\n",
> + opcode_info->mnemonic);
> + return FALSE;
> + }
> + }
> +
> + tgsi_parse_free(&parse);
> +
> + FREE(bld_base->instructions);
> +
> + if (bld_base->emit_epilogue) {
> + bld_base->emit_epilogue(bld_base);
> + }
> +
> + return TRUE;
> +}
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> index 9713d10..3ca6719 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> @@ -1,5 +1,6 @@
> /**************************************************************************
> *
> + * Copyright 2010-2011 Advanced Micro Devices, Inc.
> * Copyright 2009 VMware, Inc.
> * All Rights Reserved.
> *
> @@ -30,21 +31,33 @@
> * TGSI to LLVM IR translation.
> *
> * @author Jose Fonseca <jfonseca at vmware.com>
> + * @author Tom Stellard <thomas.stellard at amd.com>
> */
>
> #ifndef LP_BLD_TGSI_H
> #define LP_BLD_TGSI_H
>
> #include "gallivm/lp_bld.h"
> +#include "gallivm/lp_bld_action.h"
> +#include "gallivm/lp_bld_limits.h"
> +#include "lp_bld_type.h"
> #include "pipe/p_compiler.h"
> #include "pipe/p_state.h"
> +#include "tgsi/tgsi_exec.h"
> #include "tgsi/tgsi_scan.h"
>
>
> +#define LP_CHAN_ALL ~0
> +
> +#define LP_MAX_INSTRUCTIONS 256
> +
> +struct tgsi_full_declaration;
> +struct tgsi_full_immediate;
> +struct tgsi_full_instruction;
> +struct tgsi_full_src_register;
> +struct tgsi_opcode_info;
> struct tgsi_token;
> struct tgsi_shader_info;
> -struct lp_type;
> -struct lp_build_context;
> struct lp_build_mask_context;
> struct gallivm_state;
>
> @@ -207,4 +220,328 @@ lp_build_system_values_array(struct
> gallivm_state *gallivm,
> LLVMValueRef facing);
>
>
> +struct lp_exec_mask {
> + struct lp_build_context *bld;
> +
> + boolean has_mask;
> +
> + LLVMTypeRef int_vec_type;
> +
> + LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
> + int cond_stack_size;
> + LLVMValueRef cond_mask;
> +
> + LLVMBasicBlockRef loop_block;
> + LLVMValueRef cont_mask;
> + LLVMValueRef break_mask;
> + LLVMValueRef break_var;
> + struct {
> + LLVMBasicBlockRef loop_block;
> + LLVMValueRef cont_mask;
> + LLVMValueRef break_mask;
> + LLVMValueRef break_var;
> + } loop_stack[LP_MAX_TGSI_NESTING];
> + int loop_stack_size;
> +
> + LLVMValueRef ret_mask;
> + struct {
> + int pc;
> + LLVMValueRef ret_mask;
> + } call_stack[LP_MAX_TGSI_NESTING];
> + int call_stack_size;
> +
> + LLVMValueRef exec_mask;
> +};
> +
> +struct lp_build_tgsi_inst_list
> +{
> + struct tgsi_full_instruction *instructions;
> + uint max_instructions;
> + uint num_instructions;
> +};
> +
> +unsigned lp_bld_tgsi_list_init(struct lp_build_tgsi_context *
> bld_base);
> +
> +
> +unsigned lp_bld_tgsi_add_instruction(
> + struct lp_build_tgsi_context * bld_base,
> + struct tgsi_full_instruction *inst_to_add);
> +
> +
> +struct lp_build_tgsi_context;
> +
> +
> +typedef LLVMValueRef (*lp_build_emit_fetch_fn)(struct
> lp_build_tgsi_context *,
> + const struct
> tgsi_full_src_register *,
> + unsigned);
> +
> +struct lp_build_tgsi_context
> +{
> + struct lp_build_context base;
> +
> + /** This array stores functions that are used to transform TGSI
> opcodes to
> + * LLVM instructions.
> + */
> + struct lp_build_opcode_action op_actions[TGSI_OPCODE_LAST];
> +
> + /* TGSI_OPCODE_RSQ is defined as 1 / sqrt( abs(src0.x) ),
> rsq_action
> + * should compute 1 / sqrt (src0.x) */
> + struct lp_build_opcode_action rsq_action;
> +
> + const struct tgsi_shader_info *info;
> +
> + lp_build_emit_fetch_fn emit_fetch_funcs[TGSI_FILE_COUNT];
> +
> + LLVMValueRef (*emit_swizzle)(struct lp_build_tgsi_context *,
> + LLVMValueRef, unsigned, unsigned, unsigned,
> unsigned);
> +
> + void (*emit_store)(struct lp_build_tgsi_context *,
> + const struct tgsi_full_instruction *,
> + const struct tgsi_opcode_info *,
> + LLVMValueRef dst[4]);
> +
> + void (*emit_declaration)(struct lp_build_tgsi_context *,
> + const struct tgsi_full_declaration
> *decl);
> +
> + void (*emit_immediate)(struct lp_build_tgsi_context *,
> + const struct tgsi_full_immediate *imm);
> +
> +
> + /* Allow the user to store data in this structure rather than
> passing it
> + * to every function. */
> + void * userdata;
> +
> + boolean soa;
> +
> + int pc;
> +
> + struct tgsi_full_instruction *instructions;
> + uint max_instructions;
> + uint num_instructions;
> +
> + /** This function allows the user to insert some instructions at
> the
> + * beginning of the program. It is optional and does not need
> to be
> + * implemented.
> + */
> + void (*emit_prologue)(struct lp_build_tgsi_context*);
> +
> + /** This function allows the user to insert some instructions at
> the end of
> + * the program. This callback is intended to be used for
> emitting
> + * instructions to handle the export for the output registers,
> but it can
> + * be used for any purpose. Implementing this function is
> optiona, but
> + * recommended.
> + */
> + void (*emit_epilogue)(struct lp_build_tgsi_context*);
> +};
> +
> +struct lp_build_tgsi_soa_context
> +{
> + struct lp_build_tgsi_context bld_base;
> +
> + /* Builder for vector integer masks and indices */
> + struct lp_build_context uint_bld;
> +
> + /* Builder for scalar elements of shader's data type (float) */
> + struct lp_build_context elem_bld;
> +
> + LLVMValueRef consts_ptr;
> + const LLVMValueRef *pos;
> + const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS];
> + LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS];
> +
> + const struct lp_build_sampler_soa *sampler;
> +
> + LLVMValueRef
> immediates[LP_MAX_TGSI_IMMEDIATES][TGSI_NUM_CHANNELS];
> + LLVMValueRef temps[LP_MAX_TGSI_TEMPS][TGSI_NUM_CHANNELS];
> + LLVMValueRef addr[LP_MAX_TGSI_ADDRS][TGSI_NUM_CHANNELS];
> + LLVMValueRef preds[LP_MAX_TGSI_PREDS][TGSI_NUM_CHANNELS];
> +
> + /* We allocate/use this array of temps if (1 <<
> TGSI_FILE_TEMPORARY) is
> + * set in the indirect_files field.
> + * The temps[] array above is unused then.
> + */
> + LLVMValueRef temps_array;
> +
> + /* We allocate/use this array of output if (1 <<
> TGSI_FILE_OUTPUT) is
> + * set in the indirect_files field.
> + * The outputs[] array above is unused then.
> + */
> + LLVMValueRef outputs_array;
> +
> + /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT)
> is
> + * set in the indirect_files field.
> + * The inputs[] array above is unused then.
> + */
> + LLVMValueRef inputs_array;
> +
> + LLVMValueRef system_values_array;
> +
> + /** bitmask indicating which register files are accessed
> indirectly */
> + unsigned indirect_files;
> +
> + struct lp_build_mask_context *mask;
> + struct lp_exec_mask exec_mask;
> +
> + uint num_immediates;
> +
> +};
> +
> +void
> +lp_emit_declaration_soa(
> + struct lp_build_tgsi_context *bld,
> + const struct tgsi_full_declaration *decl);
> +
> +void lp_emit_immediate_soa(
> + struct lp_build_tgsi_context *bld_base,
> + const struct tgsi_full_immediate *imm);
> +
> +boolean
> +lp_emit_instruction_soa(
> + struct lp_build_tgsi_soa_context *bld,
> + const struct tgsi_full_instruction *inst,
> + const struct tgsi_opcode_info *info);
> +
> +
> +LLVMValueRef
> +lp_get_temp_ptr_soa(
> + struct lp_build_tgsi_soa_context *bld,
> + unsigned index,
> + unsigned chan);
> +
> +LLVMValueRef
> +lp_get_output_ptr(
> + struct lp_build_tgsi_soa_context *bld,
> + unsigned index,
> + unsigned chan);
> +
> +struct lp_build_tgsi_aos_context
> +{
> + struct lp_build_tgsi_context bld_base;
> +
> + /* Builder for integer masks and indices */
> + struct lp_build_context int_bld;
> +
> + /*
> + * AoS swizzle used:
> + * - swizzles[0] = red index
> + * - swizzles[1] = green index
> + * - swizzles[2] = blue index
> + * - swizzles[3] = alpha index
> + */
> + unsigned char swizzles[4];
> + unsigned char inv_swizzles[4];
> +
> + LLVMValueRef consts_ptr;
> + const LLVMValueRef *inputs;
> + LLVMValueRef *outputs;
> +
> + struct lp_build_sampler_aos *sampler;
> +
> + LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
> + LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
> + LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
> + LLVMValueRef preds[LP_MAX_TGSI_PREDS];
> +
> + /* We allocate/use this array of temps if (1 <<
> TGSI_FILE_TEMPORARY) is
> + * set in the indirect_files field.
> + * The temps[] array above is unused then.
> + */
> + LLVMValueRef temps_array;
> +
> + /** bitmask indicating which register files are accessed
> indirectly */
> + unsigned indirect_files;
> +
> +};
> +
> +static INLINE struct lp_build_tgsi_soa_context *
> +lp_soa_context(struct lp_build_tgsi_context *bld_base)
> +{
> + return (struct lp_build_tgsi_soa_context *)bld_base;
> +}
> +
> +static INLINE struct lp_build_tgsi_aos_context *
> +lp_aos_context(struct lp_build_tgsi_context *bld_base)
> +{
> + return (struct lp_build_tgsi_aos_context *)bld_base;
> +}
> +
> +void
> +lp_emit_declaration_aos(
> + struct lp_build_tgsi_aos_context *bld,
> + const struct tgsi_full_declaration *decl);
> +
> +
> +boolean
> +lp_emit_instruction_aos(
> + struct lp_build_tgsi_aos_context *bld,
> + const struct tgsi_full_instruction *inst,
> + const struct tgsi_opcode_info *info,
> + int *pc);
> +
> +void
> +lp_emit_store_aos(
> + struct lp_build_tgsi_aos_context *bld,
> + const struct tgsi_full_instruction *inst,
> + unsigned index,
> + LLVMValueRef value);
> +
> +void lp_build_fetch_args(
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data);
> +
> +LLVMValueRef
> +lp_build_tgsi_inst_llvm_aos(
> + struct lp_build_tgsi_context * bld_base,
> + const struct tgsi_full_instruction *inst);
> +
> +void
> +lp_build_tgsi_intrinsic(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data);
> +
> +LLVMValueRef
> +lp_build_emit_llvm(
> + struct lp_build_tgsi_context *bld_base,
> + unsigned tgsi_opcode,
> + struct lp_build_emit_data * emit_data);
> +
> +LLVMValueRef
> +lp_build_emit_llvm_unary(
> + struct lp_build_tgsi_context *bld_base,
> + unsigned tgsi_opcode,
> + LLVMValueRef arg0);
> +
> +LLVMValueRef
> +lp_build_emit_llvm_binary(
> + struct lp_build_tgsi_context *bld_base,
> + unsigned tgsi_opcode,
> + LLVMValueRef arg0,
> + LLVMValueRef arg1);
> +
> +LLVMValueRef
> +lp_build_emit_llvm_ternary(
> + struct lp_build_tgsi_context *bld_base,
> + unsigned tgsi_opcode,
> + LLVMValueRef arg0,
> + LLVMValueRef arg1,
> + LLVMValueRef arg2);
> +
> +boolean
> +lp_build_tgsi_inst_llvm(
> + struct lp_build_tgsi_context * bld_base,
> + const struct tgsi_full_instruction *inst);
> +
> +LLVMValueRef
> +lp_build_emit_fetch(
> + struct lp_build_tgsi_context *bld_base,
> + const struct tgsi_full_instruction *inst,
> + unsigned src_op,
> + const unsigned chan_index);
> +
> +boolean
> +lp_build_tgsi_llvm(
> + struct lp_build_tgsi_context * bld_base,
> + const struct tgsi_token *tokens);
> +
> #endif /* LP_BLD_TGSI_H */
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
> index a021efd..26cfffe 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
> @@ -55,61 +55,15 @@
> #include "lp_bld_flow.h"
> #include "lp_bld_quad.h"
> #include "lp_bld_tgsi.h"
> -#include "lp_bld_limits.h"
> #include "lp_bld_debug.h"
>
>
> -#define LP_MAX_INSTRUCTIONS 256
> -
> -
> -struct lp_build_tgsi_aos_context
> -{
> - struct lp_build_context base;
> -
> - /* Builder for integer masks and indices */
> - struct lp_build_context int_bld;
> -
> - /*
> - * AoS swizzle used:
> - * - swizzles[0] = red index
> - * - swizzles[1] = green index
> - * - swizzles[2] = blue index
> - * - swizzles[3] = alpha index
> - */
> - unsigned char swizzles[4];
> - unsigned char inv_swizzles[4];
> -
> - LLVMValueRef consts_ptr;
> - const LLVMValueRef *inputs;
> - LLVMValueRef *outputs;
> -
> - struct lp_build_sampler_aos *sampler;
> -
> - LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
> - LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
> - LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
> - LLVMValueRef preds[LP_MAX_TGSI_PREDS];
> -
> - /* We allocate/use this array of temps if (1 <<
> TGSI_FILE_TEMPORARY) is
> - * set in the indirect_files field.
> - * The temps[] array above is unused then.
> - */
> - LLVMValueRef temps_array;
> -
> - /** bitmask indicating which register files are accessed
> indirectly */
> - unsigned indirect_files;
> -
> - struct tgsi_full_instruction *instructions;
> - uint max_instructions;
> -};
> -
> -
> /**
> * Wrapper around lp_build_swizzle_aos which translates swizzles to
> another
> * ordering.
> */
> static LLVMValueRef
> -swizzle_aos(struct lp_build_tgsi_aos_context *bld,
> +swizzle_aos(struct lp_build_tgsi_context *bld_base,
> LLVMValueRef a,
> unsigned swizzle_x,
> unsigned swizzle_y,
> @@ -117,6 +71,7 @@ swizzle_aos(struct lp_build_tgsi_aos_context *bld,
> unsigned swizzle_w)
> {
> unsigned char swizzles[4];
> + struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
>
> assert(swizzle_x < 4);
> assert(swizzle_y < 4);
> @@ -128,7 +83,7 @@ swizzle_aos(struct lp_build_tgsi_aos_context *bld,
> swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
> swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
>
> - return lp_build_swizzle_aos(&bld->base, a, swizzles);
> + return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
> }
>
>
> @@ -138,149 +93,132 @@ swizzle_scalar_aos(struct
> lp_build_tgsi_aos_context *bld,
> unsigned chan)
> {
> chan = bld->swizzles[chan];
> - return lp_build_swizzle_scalar_aos(&bld->base, a, chan);
> + return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan);
> }
>
>
> -/**
> - * Register fetch.
> - */
> static LLVMValueRef
> -emit_fetch(
> - struct lp_build_tgsi_aos_context *bld,
> - const struct tgsi_full_instruction *inst,
> - unsigned src_op)
> +emit_fetch_constant(
> + struct lp_build_tgsi_context * bld_base,
> + const struct tgsi_full_src_register * reg,
> + const unsigned swizzle)
> {
> - LLVMBuilderRef builder = bld->base.gallivm->builder;
> - struct lp_type type = bld->base.type;
> - const struct tgsi_full_src_register *reg = &inst->Src[src_op];
> + struct lp_build_tgsi_aos_context * bld =
> lp_aos_context(bld_base);
> + LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> + struct lp_type type = bld_base->base.type;
> LLVMValueRef res;
> unsigned chan;
>
> assert(!reg->Register.Indirect);
>
> /*
> - * Fetch the from the register file.
> + * Get the constants components
> */
>
> - switch (reg->Register.File) {
> - case TGSI_FILE_CONSTANT:
> - /*
> - * Get the constants components
> - */
> -
> - res = bld->base.undef;
> - for (chan = 0; chan < 4; ++chan) {
> - LLVMValueRef index;
> - LLVMValueRef scalar_ptr;
> - LLVMValueRef scalar;
> - LLVMValueRef swizzle;
> -
> - index = lp_build_const_int32(bld->base.gallivm,
> reg->Register.Index * 4 + chan);
> -
> - scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
> - &index, 1, "");
> + res = bld->bld_base.base.undef;
> + for (chan = 0; chan < 4; ++chan) {
> + LLVMValueRef index;
> + LLVMValueRef scalar_ptr;
> + LLVMValueRef scalar;
> + LLVMValueRef swizzle;
>
> - scalar = LLVMBuildLoad(builder, scalar_ptr, "");
> + index = lp_build_const_int32(bld->bld_base.base.gallivm,
> + reg->Register.Index * 4 + chan);
>
> - lp_build_name(scalar, "const[%u].%c", reg->Register.Index,
> "xyzw"[chan]);
> + scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1,
> "");
>
> - /*
> - * NOTE: constants array is always assumed to be RGBA
> - */
> + scalar = LLVMBuildLoad(builder, scalar_ptr, "");
>
> - swizzle = lp_build_const_int32(bld->base.gallivm, chan);
> -
> - res = LLVMBuildInsertElement(builder, res, scalar, swizzle,
> "");
> - }
> + lp_build_name(scalar, "const[%u].%c", reg->Register.Index,
> "xyzw"[chan]);
>
> /*
> - * Broadcast the first quaternion to all others.
> - *
> - * XXX: could be factored into a reusable function.
> + * NOTE: constants array is always assumed to be RGBA
> */
>
> - if (type.length > 4) {
> - LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
> - unsigned i;
> + swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
> chan);
>
> - for (chan = 0; chan < 4; ++chan) {
> - shuffles[chan] = lp_build_const_int32(bld->base.gallivm,
> chan);
> - }
> -
> - for (i = 4; i < type.length; ++i) {
> - shuffles[i] = shuffles[i % 4];
> - }
> + res = LLVMBuildInsertElement(builder, res, scalar, swizzle,
> "");
> + }
>
> - res = LLVMBuildShuffleVector(builder,
> - res, bld->base.undef,
> - LLVMConstVector(shuffles,
> type.length),
> - "");
> - }
> - break;
> + /*
> + * Broadcast the first quaternion to all others.
> + *
> + * XXX: could be factored into a reusable function.
> + */
>
> - case TGSI_FILE_IMMEDIATE:
> - res = bld->immediates[reg->Register.Index];
> - assert(res);
> - break;
> + if (type.length > 4) {
> + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
> + unsigned i;
>
> - case TGSI_FILE_INPUT:
> - res = bld->inputs[reg->Register.Index];
> - assert(res);
> - break;
> -
> - case TGSI_FILE_TEMPORARY:
> - {
> - LLVMValueRef temp_ptr;
> - temp_ptr = bld->temps[reg->Register.Index];
> - res = LLVMBuildLoad(builder, temp_ptr, "");
> - if (!res)
> - return bld->base.undef;
> + for (chan = 0; chan < 4; ++chan) {
> + shuffles[chan] =
> lp_build_const_int32(bld->bld_base.base.gallivm, chan);
> }
> - break;
>
> - default:
> - assert(0 && "invalid src register in emit_fetch()");
> - return bld->base.undef;
> - }
> -
> - /*
> - * Apply sign modifier.
> - */
> + for (i = 4; i < type.length; ++i) {
> + shuffles[i] = shuffles[i % 4];
> + }
>
> - if (reg->Register.Absolute) {
> - res = lp_build_abs(&bld->base, res);
> + res = LLVMBuildShuffleVector(builder,
> + res, bld->bld_base.base.undef,
> + LLVMConstVector(shuffles,
> type.length),
> + "");
> }
> + return res;
> +}
>
> - if(reg->Register.Negate) {
> - res = lp_build_negate(&bld->base, res);
> - }
> +static LLVMValueRef
> +emit_fetch_immediate(
> + struct lp_build_tgsi_context * bld_base,
> + const struct tgsi_full_src_register * reg,
> + const unsigned swizzle)
> +{
> + struct lp_build_tgsi_aos_context * bld =
> lp_aos_context(bld_base);
> + LLVMValueRef res = bld->immediates[reg->Register.Index];
> + assert(res);
> + return res;
> +}
>
> - /*
> - * Swizzle the argument
> - */
> +static LLVMValueRef
> +emit_fetch_input(
> + struct lp_build_tgsi_context * bld_base,
> + const struct tgsi_full_src_register * reg,
> + const unsigned swizzle)
> +{
> + struct lp_build_tgsi_aos_context * bld =
> lp_aos_context(bld_base);
> + LLVMValueRef res = bld->inputs[reg->Register.Index];
> + assert(!reg->Register.Indirect);
> + assert(res);
> + return res;
> +}
>
> - res = swizzle_aos(bld, res,
> - reg->Register.SwizzleX,
> - reg->Register.SwizzleY,
> - reg->Register.SwizzleZ,
> - reg->Register.SwizzleW);
> +static LLVMValueRef
> +emit_fetch_temporary(
> + struct lp_build_tgsi_context * bld_base,
> + const struct tgsi_full_src_register * reg,
> + const unsigned swizzle)
> +{
> + struct lp_build_tgsi_aos_context * bld =
> lp_aos_context(bld_base);
> + LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> + LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
> + LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
> + assert(!reg->Register.Indirect);
> + if (!res)
> + return bld->bld_base.base.undef;
>
> return res;
> }
>
> -
> /**
> * Register store.
> */
> -static void
> -emit_store(
> +void
> +lp_emit_store_aos(
> struct lp_build_tgsi_aos_context *bld,
> const struct tgsi_full_instruction *inst,
> unsigned index,
> LLVMValueRef value)
> {
> - LLVMBuilderRef builder = bld->base.gallivm->builder;
> + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
> const struct tgsi_full_dst_register *reg = &inst->Dst[index];
> LLVMValueRef mask = NULL;
> LLVMValueRef ptr;
> @@ -294,13 +232,13 @@ emit_store(
> break;
>
> case TGSI_SAT_ZERO_ONE:
> - value = lp_build_max(&bld->base, value, bld->base.zero);
> - value = lp_build_min(&bld->base, value, bld->base.one);
> + value = lp_build_max(&bld->bld_base.base, value,
> bld->bld_base.base.zero);
> + value = lp_build_min(&bld->bld_base.base, value,
> bld->bld_base.base.one);
> break;
>
> case TGSI_SAT_MINUS_PLUS_ONE:
> - value = lp_build_max(&bld->base, value,
> lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0));
> - value = lp_build_min(&bld->base, value, bld->base.one);
> + value = lp_build_max(&bld->bld_base.base, value,
> lp_build_const_vec(bld->bld_base.base.gallivm,
> bld->bld_base.base.type, -1.0));
> + value = lp_build_min(&bld->bld_base.base, value,
> bld->bld_base.base.one);
> break;
>
> default:
> @@ -335,6 +273,8 @@ emit_store(
> return;
> }
>
> + if (!ptr)
> + return;
> /*
> * Predicate
> */
> @@ -350,17 +290,17 @@ emit_store(
> /*
> * Convert the value to an integer mask.
> */
> - pred = lp_build_compare(bld->base.gallivm,
> - bld->base.type,
> + pred = lp_build_compare(bld->bld_base.base.gallivm,
> + bld->bld_base.base.type,
> PIPE_FUNC_NOTEQUAL,
> pred,
> - bld->base.zero);
> + bld->bld_base.base.zero);
>
> if (inst->Predicate.Negate) {
> pred = LLVMBuildNot(builder, pred, "");
> }
>
> - pred = swizzle_aos(bld, pred,
> + pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
> inst->Predicate.SwizzleX,
> inst->Predicate.SwizzleY,
> inst->Predicate.SwizzleZ,
> @@ -380,7 +320,7 @@ emit_store(
> if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
> LLVMValueRef writemask;
>
> - writemask = lp_build_const_mask_aos(bld->base.gallivm,
> bld->base.type,
> + writemask =
> lp_build_const_mask_aos(bld->bld_base.base.gallivm,
> bld->bld_base.base.type,
> reg->Register.WriteMask);
>
> if (mask) {
> @@ -394,7 +334,7 @@ emit_store(
> LLVMValueRef orig_value;
>
> orig_value = LLVMBuildLoad(builder, ptr, "");
> - value = lp_build_select(&bld->base,
> + value = lp_build_select(&bld->bld_base.base,
> mask, value, orig_value);
> }
>
> @@ -419,44 +359,44 @@ emit_tex(struct lp_build_tgsi_aos_context *bld,
>
> if (!bld->sampler) {
> _debug_printf("warning: found texture instruction but no
> sampler generator supplied\n");
> - return bld->base.undef;
> + return bld->bld_base.base.undef;
> }
>
> target = inst->Texture.Texture;
>
> - coords = emit_fetch( bld, inst, 0 );
> + coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 ,
> LP_CHAN_ALL);
>
> if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
> - ddx = emit_fetch( bld, inst, 1 );
> - ddy = emit_fetch( bld, inst, 2 );
> + ddx = lp_build_emit_fetch( &bld->bld_base, inst, 1 ,
> LP_CHAN_ALL);
> + ddy = lp_build_emit_fetch( &bld->bld_base, inst, 2 ,
> LP_CHAN_ALL);
> unit = inst->Src[3].Register.Index;
> } else {
> #if 0
> - ddx = lp_build_ddx( &bld->base, coords );
> - ddy = lp_build_ddy( &bld->base, coords );
> + ddx = lp_build_ddx( &bld->bld_base.base, coords );
> + ddy = lp_build_ddy( &bld->bld_base.base, coords );
> #else
> /* TODO */
> - ddx = bld->base.one;
> - ddy = bld->base.one;
> + ddx = bld->bld_base.base.one;
> + ddy = bld->bld_base.base.one;
> #endif
> unit = inst->Src[1].Register.Index;
> }
>
> return bld->sampler->emit_fetch_texel(bld->sampler,
> - &bld->base,
> + &bld->bld_base.base,
> target, unit,
> coords, ddx, ddy,
> modifier);
> }
>
>
> -static void
> -emit_declaration(
> +void
> +lp_emit_declaration_aos(
> struct lp_build_tgsi_aos_context *bld,
> const struct tgsi_full_declaration *decl)
> {
> - struct gallivm_state *gallivm = bld->base.gallivm;
> - LLVMTypeRef vec_type = lp_build_vec_type(bld->base.gallivm,
> bld->base.type);
> + struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
> + LLVMTypeRef vec_type =
> lp_build_vec_type(bld->bld_base.base.gallivm,
> bld->bld_base.base.type);
>
> unsigned first = decl->Range.First;
> unsigned last = decl->Range.Last;
> @@ -468,7 +408,7 @@ emit_declaration(
> assert(idx < LP_MAX_TGSI_TEMPS);
> if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
> LLVMValueRef array_size = lp_build_const_int32(gallivm,
> last + 1);
> - bld->temps_array =
> lp_build_array_alloca(bld->base.gallivm,
> + bld->temps_array =
> lp_build_array_alloca(bld->bld_base.base.gallivm,
> vec_type,
> array_size,
> "");
> } else {
> bld->temps[idx] = lp_build_alloca(gallivm, vec_type,
> "");
> @@ -501,8 +441,8 @@ emit_declaration(
> * Emit LLVM for one TGSI instruction.
> * \param return TRUE for success, FALSE otherwise
> */
> -static boolean
> -emit_instruction(
> +boolean
> +lp_emit_instruction_aos(
> struct lp_build_tgsi_aos_context *bld,
> const struct tgsi_full_instruction *inst,
> const struct tgsi_opcode_info *info,
> @@ -527,17 +467,17 @@ emit_instruction(
>
> assert(info->num_dst <= 1);
> if (info->num_dst) {
> - dst0 = bld->base.undef;
> + dst0 = bld->bld_base.base.undef;
> }
>
> switch (inst->Instruction.Opcode) {
> case TGSI_OPCODE_ARL:
> - src0 = emit_fetch(bld, inst, 0);
> - dst0 = lp_build_floor(&bld->base, src0);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + dst0 = lp_build_floor(&bld->bld_base.base, src0);
> break;
>
> case TGSI_OPCODE_MOV:
> - dst0 = emit_fetch(bld, inst, 0);
> + dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> break;
>
> case TGSI_OPCODE_LIT:
> @@ -545,15 +485,15 @@ emit_instruction(
>
> case TGSI_OPCODE_RCP:
> /* TGSI_OPCODE_RECIP */
> - src0 = emit_fetch(bld, inst, 0);
> - dst0 = lp_build_rcp(&bld->base, src0);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + dst0 = lp_build_rcp(&bld->bld_base.base, src0);
> break;
>
> case TGSI_OPCODE_RSQ:
> /* TGSI_OPCODE_RECIPSQRT */
> - src0 = emit_fetch(bld, inst, 0);
> - tmp0 = lp_build_abs(&bld->base, src0);
> - dst0 = lp_build_rsqrt(&bld->base, tmp0);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + tmp0 = lp_build_emit_llvm_unary(&bld->bld_base,
> TGSI_OPCODE_ABS, src0);
> + dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
> break;
>
> case TGSI_OPCODE_EXP:
> @@ -563,15 +503,15 @@ emit_instruction(
> return FALSE;
>
> case TGSI_OPCODE_MUL:
> - src0 = emit_fetch(bld, inst, 0);
> - src1 = emit_fetch(bld, inst, 1);
> - dst0 = lp_build_mul(&bld->base, src0, src1);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> + dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
> break;
>
> case TGSI_OPCODE_ADD:
> - src0 = emit_fetch(bld, inst, 0);
> - src1 = emit_fetch(bld, inst, 1);
> - dst0 = lp_build_add(&bld->base, src0, src1);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> + dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
> break;
>
> case TGSI_OPCODE_DP3:
> @@ -586,121 +526,116 @@ emit_instruction(
> return FALSE;
>
> case TGSI_OPCODE_MIN:
> - src0 = emit_fetch(bld, inst, 0);
> - src1 = emit_fetch(bld, inst, 1);
> - dst0 = lp_build_max(&bld->base, src0, src1);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> + dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
> break;
>
> case TGSI_OPCODE_MAX:
> - src0 = emit_fetch(bld, inst, 0);
> - src1 = emit_fetch(bld, inst, 1);
> - dst0 = lp_build_max(&bld->base, src0, src1);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> + dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
> break;
>
> case TGSI_OPCODE_SLT:
> /* TGSI_OPCODE_SETLT */
> - src0 = emit_fetch(bld, inst, 0);
> - src1 = emit_fetch(bld, inst, 1);
> - tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1);
> - dst0 = lp_build_select(&bld->base, tmp0, bld->base.one,
> bld->base.zero);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> + tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0,
> src1);
> + dst0 = lp_build_select(&bld->bld_base.base, tmp0,
> bld->bld_base.base.one, bld->bld_base.base.zero);
> break;
>
> case TGSI_OPCODE_SGE:
> /* TGSI_OPCODE_SETGE */
> - src0 = emit_fetch(bld, inst, 0);
> - src1 = emit_fetch(bld, inst, 1);
> - tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1);
> - dst0 = lp_build_select(&bld->base, tmp0, bld->base.one,
> bld->base.zero);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> + tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL,
> src0, src1);
> + dst0 = lp_build_select(&bld->bld_base.base, tmp0,
> bld->bld_base.base.one, bld->bld_base.base.zero);
> break;
>
> case TGSI_OPCODE_MAD:
> /* TGSI_OPCODE_MADD */
> - src0 = emit_fetch(bld, inst, 0);
> - src1 = emit_fetch(bld, inst, 1);
> - src2 = emit_fetch(bld, inst, 2);
> - tmp0 = lp_build_mul(&bld->base, src0, src1);
> - dst0 = lp_build_add(&bld->base, tmp0, src2);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> + src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2,
> LP_CHAN_ALL);
> + tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
> + dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
> break;
>
> case TGSI_OPCODE_SUB:
> - src0 = emit_fetch(bld, inst, 0);
> - src1 = emit_fetch(bld, inst, 1);
> - dst0 = lp_build_sub(&bld->base, src0, src1);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> + dst0 = lp_build_sub(&bld->bld_base.base, src0, src1);
> break;
>
> case TGSI_OPCODE_LRP:
> - src0 = emit_fetch(bld, inst, 0);
> - src1 = emit_fetch(bld, inst, 1);
> - src2 = emit_fetch(bld, inst, 2);
> - tmp0 = lp_build_sub(&bld->base, src1, src2);
> - tmp0 = lp_build_mul(&bld->base, src0, tmp0);
> - dst0 = lp_build_add(&bld->base, tmp0, src2);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> + src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2,
> LP_CHAN_ALL);
> + tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
> + tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
> + dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
> break;
>
> case TGSI_OPCODE_CND:
> - src0 = emit_fetch(bld, inst, 0);
> - src1 = emit_fetch(bld, inst, 1);
> - src2 = emit_fetch(bld, inst, 2);
> - tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type,
> 0.5);
> - tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2,
> tmp1);
> - dst0 = lp_build_select(&bld->base, tmp0, src0, src1);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> + src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2,
> LP_CHAN_ALL);
> + tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm,
> bld->bld_base.base.type, 0.5);
> + tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER,
> src2, tmp1);
> + dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1);
> break;
>
> case TGSI_OPCODE_DP2A:
> return FALSE;
>
> case TGSI_OPCODE_FRC:
> - src0 = emit_fetch(bld, inst, 0);
> - tmp0 = lp_build_floor(&bld->base, src0);
> - dst0 = lp_build_sub(&bld->base, src0, tmp0);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + tmp0 = lp_build_floor(&bld->bld_base.base, src0);
> + dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
> break;
>
> case TGSI_OPCODE_CLAMP:
> - src0 = emit_fetch(bld, inst, 0);
> - src1 = emit_fetch(bld, inst, 1);
> - src2 = emit_fetch(bld, inst, 2);
> - tmp0 = lp_build_max(&bld->base, src0, src1);
> - dst0 = lp_build_min(&bld->base, tmp0, src2);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> + src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2,
> LP_CHAN_ALL);
> + tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
> + dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
> break;
>
> case TGSI_OPCODE_FLR:
> - src0 = emit_fetch(bld, inst, 0);
> - dst0 = lp_build_floor(&bld->base, src0);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + dst0 = lp_build_floor(&bld->bld_base.base, src0);
> break;
>
> case TGSI_OPCODE_ROUND:
> - src0 = emit_fetch(bld, inst, 0);
> - dst0 = lp_build_round(&bld->base, src0);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + dst0 = lp_build_round(&bld->bld_base.base, src0);
> break;
>
> case TGSI_OPCODE_EX2:
> - src0 = emit_fetch(bld, inst, 0);
> - tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0,
> TGSI_SWIZZLE_X);
> - dst0 = lp_build_exp2(&bld->base, tmp0);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0,
> TGSI_SWIZZLE_X);
> + dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
> break;
>
> case TGSI_OPCODE_LG2:
> - src0 = emit_fetch(bld, inst, 0);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
> - dst0 = lp_build_log2(&bld->base, tmp0);
> + dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
> break;
>
> case TGSI_OPCODE_POW:
> - src0 = emit_fetch(bld, inst, 0);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
> - src1 = emit_fetch(bld, inst, 1);
> + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
> - dst0 = lp_build_pow(&bld->base, src0, src1);
> + dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
> break;
>
> case TGSI_OPCODE_XPD:
> return FALSE;
>
> - case TGSI_OPCODE_ABS:
> - src0 = emit_fetch(bld, inst, 0);
> - dst0 = lp_build_abs(&bld->base, src0);
> - break;
> -
> case TGSI_OPCODE_RCC:
> /* deprecated? */
> assert(0);
> @@ -710,9 +645,9 @@ emit_instruction(
> return FALSE;
>
> case TGSI_OPCODE_COS:
> - src0 = emit_fetch(bld, inst, 0);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
> - dst0 = lp_build_cos(&bld->base, tmp0);
> + dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
> break;
>
> case TGSI_OPCODE_DDX:
> @@ -748,45 +683,45 @@ emit_instruction(
> return FALSE;
>
> case TGSI_OPCODE_SEQ:
> - src0 = emit_fetch(bld, inst, 0);
> - src1 = emit_fetch(bld, inst, 1);
> - tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1);
> - dst0 = lp_build_select(&bld->base, tmp0, bld->base.one,
> bld->base.zero);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> + tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL,
> src0, src1);
> + dst0 = lp_build_select(&bld->bld_base.base, tmp0,
> bld->bld_base.base.one, bld->bld_base.base.zero);
> break;
>
> case TGSI_OPCODE_SFL:
> - dst0 = bld->base.zero;
> + dst0 = bld->bld_base.base.zero;
> break;
>
> case TGSI_OPCODE_SGT:
> - src0 = emit_fetch(bld, inst, 0);
> - src1 = emit_fetch(bld, inst, 1);
> - tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0,
> src1);
> - dst0 = lp_build_select(&bld->base, tmp0, bld->base.one,
> bld->base.zero);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> + tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER,
> src0, src1);
> + dst0 = lp_build_select(&bld->bld_base.base, tmp0,
> bld->bld_base.base.one, bld->bld_base.base.zero);
> break;
>
> case TGSI_OPCODE_SIN:
> - src0 = emit_fetch(bld, inst, 0);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
> - dst0 = lp_build_sin(&bld->base, tmp0);
> + dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
> break;
>
> case TGSI_OPCODE_SLE:
> - src0 = emit_fetch(bld, inst, 0);
> - src1 = emit_fetch(bld, inst, 1);
> - tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1);
> - dst0 = lp_build_select(&bld->base, tmp0, bld->base.one,
> bld->base.zero);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> + tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL,
> src0, src1);
> + dst0 = lp_build_select(&bld->bld_base.base, tmp0,
> bld->bld_base.base.one, bld->bld_base.base.zero);
> break;
>
> case TGSI_OPCODE_SNE:
> - src0 = emit_fetch(bld, inst, 0);
> - src1 = emit_fetch(bld, inst, 1);
> - tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0,
> src1);
> - dst0 = lp_build_select(&bld->base, tmp0, bld->base.one,
> bld->base.zero);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> + tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL,
> src0, src1);
> + dst0 = lp_build_select(&bld->bld_base.base, tmp0,
> bld->bld_base.base.one, bld->bld_base.base.zero);
> break;
>
> case TGSI_OPCODE_STR:
> - dst0 = bld->base.one;
> + dst0 = bld->bld_base.base.one;
> break;
>
> case TGSI_OPCODE_TEX:
> @@ -834,8 +769,8 @@ emit_instruction(
> break;
>
> case TGSI_OPCODE_ARR:
> - src0 = emit_fetch(bld, inst, 0);
> - dst0 = lp_build_round(&bld->base, src0);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + dst0 = lp_build_round(&bld->bld_base.base, src0);
> break;
>
> case TGSI_OPCODE_BRA:
> @@ -856,16 +791,16 @@ emit_instruction(
>
> case TGSI_OPCODE_SSG:
> /* TGSI_OPCODE_SGN */
> - tmp0 = emit_fetch(bld, inst, 0);
> - dst0 = lp_build_sgn(&bld->base, tmp0);
> + tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
> break;
>
> case TGSI_OPCODE_CMP:
> - src0 = emit_fetch(bld, inst, 0);
> - src1 = emit_fetch(bld, inst, 1);
> - src2 = emit_fetch(bld, inst, 2);
> - tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0,
> bld->base.zero);
> - dst0 = lp_build_select(&bld->base, tmp0, src1, src2);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> + src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2,
> LP_CHAN_ALL);
> + tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0,
> bld->bld_base.base.zero);
> + dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
> break;
>
> case TGSI_OPCODE_SCS:
> @@ -934,8 +869,8 @@ emit_instruction(
> break;
>
> case TGSI_OPCODE_CEIL:
> - src0 = emit_fetch(bld, inst, 0);
> - dst0 = lp_build_ceil(&bld->base, src0);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + dst0 = lp_build_ceil(&bld->bld_base.base, src0);
> break;
>
> case TGSI_OPCODE_I2F:
> @@ -951,8 +886,8 @@ emit_instruction(
> break;
>
> case TGSI_OPCODE_TRUNC:
> - src0 = emit_fetch(bld, inst, 0);
> - dst0 = lp_build_trunc(&bld->base, src0);
> + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> + dst0 = lp_build_trunc(&bld->bld_base.base, src0);
> break;
>
> case TGSI_OPCODE_SHL:
> @@ -1028,7 +963,7 @@ emit_instruction(
> }
>
> if (info->num_dst) {
> - emit_store(bld, inst, 0, dst0);
> + lp_emit_store_aos(bld, inst, 0, dst0);
> }
>
> return TRUE;
> @@ -1049,13 +984,12 @@ lp_build_tgsi_aos(struct gallivm_state
> *gallivm,
> struct lp_build_tgsi_aos_context bld;
> struct tgsi_parse_context parse;
> uint num_immediates = 0;
> - uint num_instructions = 0;
> unsigned chan;
> int pc = 0;
>
> /* Setup build context */
> memset(&bld, 0, sizeof bld);
> - lp_build_context_init(&bld.base, gallivm, type);
> + lp_build_context_init(&bld.bld_base.base, gallivm, type);
> lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
>
> for (chan = 0; chan < 4; ++chan) {
> @@ -1068,11 +1002,17 @@ lp_build_tgsi_aos(struct gallivm_state
> *gallivm,
> bld.consts_ptr = consts_ptr;
> bld.sampler = sampler;
> bld.indirect_files = info->indirect_files;
> - bld.instructions = (struct tgsi_full_instruction *)
> - MALLOC(LP_MAX_INSTRUCTIONS * sizeof(struct
> tgsi_full_instruction));
> - bld.max_instructions = LP_MAX_INSTRUCTIONS;
> + bld.bld_base.emit_swizzle = swizzle_aos;
> +
> + bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] =
> emit_fetch_constant;
> + bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] =
> emit_fetch_immediate;
> + bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] =
> emit_fetch_input;
> + bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] =
> emit_fetch_temporary;
>
> - if (!bld.instructions) {
> + /* Set opcode actions */
> + lp_set_default_actions_cpu(&bld.bld_base);
> +
> + if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
> return;
> }
>
> @@ -1084,33 +1024,13 @@ lp_build_tgsi_aos(struct gallivm_state
> *gallivm,
> switch(parse.FullToken.Token.Type) {
> case TGSI_TOKEN_TYPE_DECLARATION:
> /* Inputs already interpolated */
> - emit_declaration(&bld, &parse.FullToken.FullDeclaration);
> + lp_emit_declaration_aos(&bld,
> &parse.FullToken.FullDeclaration);
> break;
>
> case TGSI_TOKEN_TYPE_INSTRUCTION:
> - {
> - /* save expanded instruction */
> - if (num_instructions == bld.max_instructions) {
> - struct tgsi_full_instruction *instructions;
> - instructions = REALLOC(bld.instructions,
> - bld.max_instructions
> - * sizeof(struct
> tgsi_full_instruction),
> - (bld.max_instructions +
> LP_MAX_INSTRUCTIONS)
> - * sizeof(struct
> tgsi_full_instruction));
> - if (!instructions) {
> - break;
> - }
> - bld.instructions = instructions;
> - bld.max_instructions += LP_MAX_INSTRUCTIONS;
> - }
> -
> - memcpy(bld.instructions + num_instructions,
> - &parse.FullToken.FullInstruction,
> - sizeof(bld.instructions[0]));
> -
> - num_instructions++;
> - }
> -
> + /* save expanded instruction */
> + lp_bld_tgsi_add_instruction(&bld.bld_base,
> +
> &parse.FullToken.FullInstruction);
> break;
>
> case TGSI_TOKEN_TYPE_IMMEDIATE:
> @@ -1144,10 +1064,10 @@ lp_build_tgsi_aos(struct gallivm_state
> *gallivm,
> }
>
> while (pc != -1) {
> - struct tgsi_full_instruction *instr = bld.instructions + pc;
> + struct tgsi_full_instruction *instr =
> bld.bld_base.instructions + pc;
> const struct tgsi_opcode_info *opcode_info =
> tgsi_get_opcode_info(instr->Instruction.Opcode);
> - if (!emit_instruction(&bld, instr, opcode_info, &pc))
> + if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
> _debug_printf("warning: failed to translate tgsi opcode %s
> to LLVM\n",
> opcode_info->mnemonic);
> }
> @@ -1168,6 +1088,5 @@ lp_build_tgsi_aos(struct gallivm_state
> *gallivm,
> LLVMDumpModule(module);
> }
>
> - FREE(bld.instructions);
> }
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index 1ad0b74..f7e15db 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -47,6 +47,7 @@
> #include "tgsi/tgsi_parse.h"
> #include "tgsi/tgsi_util.h"
> #include "tgsi/tgsi_scan.h"
> +#include "lp_bld_action.h"
> #include "lp_bld_type.h"
> #include "lp_bld_const.h"
> #include "lp_bld_arit.h"
> @@ -63,97 +64,6 @@
> #include "lp_bld_printf.h"
>
>
> -#define NUM_CHANNELS 4
> -
> -#define LP_MAX_INSTRUCTIONS 256
> -
> -
> -struct lp_exec_mask {
> - struct lp_build_context *bld;
> -
> - boolean has_mask;
> -
> - LLVMTypeRef int_vec_type;
> -
> - LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
> - int cond_stack_size;
> - LLVMValueRef cond_mask;
> -
> - LLVMBasicBlockRef loop_block;
> - LLVMValueRef cont_mask;
> - LLVMValueRef break_mask;
> - LLVMValueRef break_var;
> - struct {
> - LLVMBasicBlockRef loop_block;
> - LLVMValueRef cont_mask;
> - LLVMValueRef break_mask;
> - LLVMValueRef break_var;
> - } loop_stack[LP_MAX_TGSI_NESTING];
> - int loop_stack_size;
> -
> - LLVMValueRef ret_mask;
> - struct {
> - int pc;
> - LLVMValueRef ret_mask;
> - } call_stack[LP_MAX_TGSI_NESTING];
> - int call_stack_size;
> -
> - LLVMValueRef exec_mask;
> -};
> -
> -struct lp_build_tgsi_soa_context
> -{
> - struct lp_build_context base;
> -
> - /* Builder for vector integer masks and indices */
> - struct lp_build_context uint_bld;
> -
> - /* Builder for scalar elements of shader's data type (float) */
> - struct lp_build_context elem_bld;
> -
> - LLVMValueRef consts_ptr;
> - const LLVMValueRef *pos;
> - const LLVMValueRef (*inputs)[NUM_CHANNELS];
> - LLVMValueRef (*outputs)[NUM_CHANNELS];
> -
> - const struct lp_build_sampler_soa *sampler;
> -
> - LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
> - LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
> - LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
> - LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
> -
> - /* We allocate/use this array of temps if (1 <<
> TGSI_FILE_TEMPORARY) is
> - * set in the indirect_files field.
> - * The temps[] array above is unused then.
> - */
> - LLVMValueRef temps_array;
> -
> - /* We allocate/use this array of output if (1 <<
> TGSI_FILE_OUTPUT) is
> - * set in the indirect_files field.
> - * The outputs[] array above is unused then.
> - */
> - LLVMValueRef outputs_array;
> -
> - /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT)
> is
> - * set in the indirect_files field.
> - * The inputs[] array above is unused then.
> - */
> - LLVMValueRef inputs_array;
> -
> - LLVMValueRef system_values_array;
> -
> - const struct tgsi_shader_info *info;
> - /** bitmask indicating which register files are accessed
> indirectly */
> - unsigned indirect_files;
> -
> - struct lp_build_mask_context *mask;
> - struct lp_exec_mask exec_mask;
> -
> - struct tgsi_full_instruction *instructions;
> - uint max_instructions;
> -};
> -
> static void lp_exec_mask_init(struct lp_exec_mask *mask, struct
> lp_build_context *bld)
> {
> mask->bld = bld;
> @@ -438,15 +348,15 @@ static void lp_exec_mask_endsub(struct
> lp_exec_mask *mask, int *pc)
> * \param index which temporary register
> * \param chan which channel of the temp register.
> */
> -static LLVMValueRef
> -get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
> +LLVMValueRef
> +lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
> unsigned index,
> unsigned chan)
> {
> - LLVMBuilderRef builder = bld->base.gallivm->builder;
> + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
> assert(chan < 4);
> if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
> - LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm,
> index * 4 + chan);
> + LLVMValueRef lindex =
> lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
> return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1,
> "");
> }
> else {
> @@ -460,15 +370,15 @@ get_temp_ptr(struct lp_build_tgsi_soa_context
> *bld,
> * \param index which output register
> * \param chan which channel of the output register.
> */
> -static LLVMValueRef
> -get_output_ptr(struct lp_build_tgsi_soa_context *bld,
> +LLVMValueRef
> +lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
> unsigned index,
> unsigned chan)
> {
> - LLVMBuilderRef builder = bld->base.gallivm->builder;
> + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
> assert(chan < 4);
> if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
> - LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm,
> + LLVMValueRef lindex =
> lp_build_const_int32(bld->bld_base.base.gallivm,
> index * 4 + chan);
> return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1,
> "");
> }
> @@ -487,15 +397,15 @@ build_gather(struct lp_build_tgsi_soa_context
> *bld,
> LLVMValueRef base_ptr,
> LLVMValueRef indexes)
> {
> - LLVMBuilderRef builder = bld->base.gallivm->builder;
> - LLVMValueRef res = bld->base.undef;
> + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
> + LLVMValueRef res = bld->bld_base.base.undef;
> unsigned i;
>
> /*
> * Loop over elements of index_vec, load scalar value, insert it
> into 'res'.
> */
> - for (i = 0; i < bld->base.type.length; i++) {
> - LLVMValueRef ii = lp_build_const_int32(bld->base.gallivm, i);
> + for (i = 0; i < bld->bld_base.base.type.length; i++) {
> + LLVMValueRef ii =
> lp_build_const_int32(bld->bld_base.base.gallivm, i);
> LLVMValueRef index = LLVMBuildExtractElement(builder,
> indexes, ii, "");
> LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
> @@ -520,7 +430,7 @@ emit_mask_scatter(struct
> lp_build_tgsi_soa_context *bld,
> struct lp_exec_mask *mask,
> LLVMValueRef pred)
> {
> - struct gallivm_state *gallivm = bld->base.gallivm;
> + struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
> LLVMBuilderRef builder = gallivm->builder;
> unsigned i;
>
> @@ -537,7 +447,7 @@ emit_mask_scatter(struct
> lp_build_tgsi_soa_context *bld,
> /*
> * Loop over elements of index_vec, store scalar value.
> */
> - for (i = 0; i < bld->base.type.length; i++) {
> + for (i = 0; i < bld->bld_base.base.type.length; i++) {
> LLVMValueRef ii = lp_build_const_int32(gallivm, i);
> LLVMValueRef index = LLVMBuildExtractElement(builder, indexes,
> ii, "");
> LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
> &index, 1, "scatter_ptr");
> @@ -573,7 +483,7 @@ get_indirect_index(struct
> lp_build_tgsi_soa_context *bld,
> unsigned reg_file, unsigned reg_index,
> const struct tgsi_src_register *indirect_reg)
> {
> - LLVMBuilderRef builder = bld->base.gallivm->builder;
> + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
> struct lp_build_context *uint_bld = &bld->uint_bld;
> /* always use X component of address register */
> unsigned swizzle = indirect_reg->SwizzleX;
> @@ -584,7 +494,7 @@ get_indirect_index(struct
> lp_build_tgsi_soa_context *bld,
>
> assert(bld->indirect_files & (1 << reg_file));
>
> - base = lp_build_const_int_vec(bld->base.gallivm, uint_bld->type,
> reg_index);
> + base = lp_build_const_int_vec(bld->bld_base.base.gallivm,
> uint_bld->type, reg_index);
>
> assert(swizzle < 4);
> rel = LLVMBuildLoad(builder,
> @@ -598,9 +508,9 @@ get_indirect_index(struct
> lp_build_tgsi_soa_context *bld,
>
> index = lp_build_add(uint_bld, base, rel);
>
> - max_index = lp_build_const_int_vec(bld->base.gallivm,
> + max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
> uint_bld->type,
> -
> bld->info->file_max[reg_file]);
> +
> bld->bld_base.info->file_max[reg_file]);
>
> assert(!uint_bld->type.sign);
> index = lp_build_min(uint_bld, index, max_index);
> @@ -608,176 +518,198 @@ get_indirect_index(struct
> lp_build_tgsi_soa_context *bld,
> return index;
> }
>
> -
> -/**
> - * Register fetch.
> - */
> static LLVMValueRef
> -emit_fetch(
> - struct lp_build_tgsi_soa_context *bld,
> - const struct tgsi_full_instruction *inst,
> - unsigned src_op,
> - const unsigned chan_index )
> +emit_fetch_constant(
> + struct lp_build_tgsi_context * bld_base,
> + const struct tgsi_full_src_register * reg,
> + const unsigned swizzle)
> {
> - struct gallivm_state *gallivm = bld->base.gallivm;
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
> + struct gallivm_state *gallivm = bld_base->base.gallivm;
> LLVMBuilderRef builder = gallivm->builder;
> struct lp_build_context *uint_bld = &bld->uint_bld;
> - const struct tgsi_full_src_register *reg = &inst->Src[src_op];
> - const unsigned swizzle =
> - tgsi_util_get_full_src_register_swizzle(reg, chan_index);
> - LLVMValueRef res;
> LLVMValueRef indirect_index = NULL;
>
> - if (swizzle > 3) {
> - assert(0 && "invalid swizzle in emit_fetch()");
> - return bld->base.undef;
> - }
> + /* XXX: Handle fetching xyzw components as a vector */
> + assert(swizzle != ~0);
>
> if (reg->Register.Indirect) {
> indirect_index = get_indirect_index(bld,
> reg->Register.File,
> reg->Register.Index,
> ®->Indirect);
> - } else {
> - assert(reg->Register.Index <=
> bld->info->file_max[reg->Register.File]);
> }
>
> - switch (reg->Register.File) {
> - case TGSI_FILE_CONSTANT:
> - if (reg->Register.Indirect) {
> - LLVMValueRef swizzle_vec =
> - lp_build_const_int_vec(bld->base.gallivm,
> uint_bld->type, swizzle);
> - LLVMValueRef index_vec; /* index into the const buffer */
> + if (reg->Register.Indirect) {
> + LLVMValueRef swizzle_vec =
> + lp_build_const_int_vec(bld->bld_base.base.gallivm,
> uint_bld->type, swizzle);
> + LLVMValueRef index_vec; /* index into the const buffer */
>
> - /* index_vec = indirect_index * 4 + swizzle */
> - index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
> - index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
> + /* index_vec = indirect_index * 4 + swizzle */
> + index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
> + index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
>
> - /* Gather values from the constant buffer */
> - res = build_gather(bld, bld->consts_ptr, index_vec);
> - }
> - else {
> - LLVMValueRef index; /* index into the const buffer */
> - LLVMValueRef scalar, scalar_ptr;
> + /* Gather values from the constant buffer */
> + return build_gather(bld, bld->consts_ptr, index_vec);
> + }
> + else {
> + LLVMValueRef index; /* index into the const buffer */
> + LLVMValueRef scalar, scalar_ptr;
>
> - index = lp_build_const_int32(gallivm, reg->Register.Index*4
> + swizzle);
> + index = lp_build_const_int32(gallivm, reg->Register.Index*4 +
> swizzle);
>
> - scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
> + scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
> &index, 1, "");
> - scalar = LLVMBuildLoad(builder, scalar_ptr, "");
> + scalar = LLVMBuildLoad(builder, scalar_ptr, "");
>
> - res = lp_build_broadcast_scalar(&bld->base, scalar);
> - }
> - break;
> + return lp_build_broadcast_scalar(&bld->bld_base.base, scalar);
> + }
> +}
>
> - case TGSI_FILE_IMMEDIATE:
> - res = bld->immediates[reg->Register.Index][swizzle];
> - assert(res);
> - break;
> +static LLVMValueRef
> +emit_fetch_immediate(
> + struct lp_build_tgsi_context * bld_base,
> + const struct tgsi_full_src_register * reg,
> + const unsigned swizzle)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
> + LLVMValueRef res = bld->immediates[reg->Register.Index][swizzle];
> + assert(res);
> + return res;
> +}
>
> - case TGSI_FILE_INPUT:
> - if (reg->Register.Indirect) {
> - LLVMValueRef swizzle_vec =
> - lp_build_const_int_vec(gallivm, uint_bld->type,
> swizzle);
> - LLVMValueRef length_vec =
> - lp_build_const_int_vec(gallivm, uint_bld->type,
> bld->base.type.length);
> - LLVMValueRef index_vec; /* index into the const buffer */
> - LLVMValueRef inputs_array;
> - LLVMTypeRef float4_ptr_type;
> +static LLVMValueRef
> +emit_fetch_input(
> + struct lp_build_tgsi_context * bld_base,
> + const struct tgsi_full_src_register * reg,
> + const unsigned swizzle)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
> + struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
> + LLVMBuilderRef builder = gallivm->builder;
> + struct lp_build_context *uint_bld = &bld->uint_bld;
> + LLVMValueRef indirect_index = NULL;
> + LLVMValueRef res;
>
> - /* index_vec = (indirect_index * 4 + swizzle) * length */
> - index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
> - index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
> - index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
> + if (reg->Register.Indirect) {
> + indirect_index = get_indirect_index(bld,
> + reg->Register.File,
> + reg->Register.Index,
> + ®->Indirect);
> + }
>
> - /* cast inputs_array pointer to float* */
> - float4_ptr_type =
> LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
> - inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
> + if (reg->Register.Indirect) {
> + LLVMValueRef swizzle_vec =
> + lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
> + LLVMValueRef length_vec =
> + lp_build_const_int_vec(gallivm, uint_bld->type,
> bld->bld_base.base.type.length);
> + LLVMValueRef index_vec; /* index into the const buffer */
> + LLVMValueRef inputs_array;
> + LLVMTypeRef float4_ptr_type;
> +
> + /* index_vec = (indirect_index * 4 + swizzle) * length */
> + index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
> + index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
> + index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
> +
> + /* cast inputs_array pointer to float* */
> + float4_ptr_type =
> LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
> + inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
> float4_ptr_type, "");
>
> - /* Gather values from the temporary register array */
> - res = build_gather(bld, inputs_array, index_vec);
> - } else {
> - if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
> - LLVMValueRef lindex = lp_build_const_int32(gallivm,
> - reg->Register.Index * 4 +
> swizzle);
> - LLVMValueRef input_ptr = LLVMBuildGEP(builder,
> -
> bld->inputs_array,
> &lindex, 1, "");
> - res = LLVMBuildLoad(builder, input_ptr, "");
> - }
> - else {
> - res = bld->inputs[reg->Register.Index][swizzle];
> - }
> - }
> - assert(res);
> - break;
> -
> - case TGSI_FILE_TEMPORARY:
> - if (reg->Register.Indirect) {
> - LLVMValueRef swizzle_vec =
> - lp_build_const_int_vec(bld->base.gallivm,
> uint_bld->type, swizzle);
> - LLVMValueRef length_vec =
> - lp_build_const_int_vec(bld->base.gallivm,
> uint_bld->type,
> - bld->base.type.length);
> - LLVMValueRef index_vec; /* index into the const buffer */
> - LLVMValueRef temps_array;
> - LLVMTypeRef float4_ptr_type;
> -
> - /* index_vec = (indirect_index * 4 + swizzle) * length */
> - index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
> - index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
> - index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
> -
> - /* cast temps_array pointer to float* */
> - float4_ptr_type =
> LLVMPointerType(LLVMFloatTypeInContext(bld->base.gallivm->context),
> 0);
> - temps_array = LLVMBuildBitCast(builder, bld->temps_array,
> - float4_ptr_type, "");
> -
> - /* Gather values from the temporary register array */
> - res = build_gather(bld, temps_array, index_vec);
> + /* Gather values from the temporary register array */
> + res = build_gather(bld, inputs_array, index_vec);
> + } else {
> + if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
> + LLVMValueRef lindex = lp_build_const_int32(gallivm,
> + reg->Register.Index * 4 +
> swizzle);
> + LLVMValueRef input_ptr = LLVMBuildGEP(builder,
> + bld->inputs_array,
> &lindex, 1, "");
> + res = LLVMBuildLoad(builder, input_ptr, "");
> }
> else {
> - LLVMValueRef temp_ptr;
> - temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
> - res = LLVMBuildLoad(builder, temp_ptr, "");
> - if (!res)
> - return bld->base.undef;
> + res = bld->inputs[reg->Register.Index][swizzle];
> }
> - break;
> -
> - case TGSI_FILE_SYSTEM_VALUE:
> - assert(!reg->Register.Indirect);
> - {
> - LLVMValueRef index; /* index into the system value array
> */
> - LLVMValueRef scalar, scalar_ptr;
> -
> - index = lp_build_const_int32(gallivm,
> - reg->Register.Index * 4 +
> swizzle);
> -
> - scalar_ptr = LLVMBuildGEP(builder,
> bld->system_values_array,
> - &index, 1, "");
> - scalar = LLVMBuildLoad(builder, scalar_ptr, "");
> + }
> + assert(res);
> + return res;
> +}
>
> - res = lp_build_broadcast_scalar(&bld->base, scalar);
> - }
> - break;
> +static LLVMValueRef
> +emit_fetch_temporary(
> + struct lp_build_tgsi_context * bld_base,
> + const struct tgsi_full_src_register * reg,
> + const unsigned swizzle)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
> + struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
> + LLVMBuilderRef builder = gallivm->builder;
> + struct lp_build_context *uint_bld = &bld->uint_bld;
> + LLVMValueRef indirect_index = NULL;
> + LLVMValueRef res;
>
> - default:
> - assert(0 && "invalid src register in emit_fetch()");
> - return bld->base.undef;
> + if (reg->Register.Indirect) {
> + indirect_index = get_indirect_index(bld,
> + reg->Register.File,
> + reg->Register.Index,
> + ®->Indirect);
> }
>
> - if (reg->Register.Absolute) {
> - res = lp_build_abs( &bld->base, res );
> + if (reg->Register.Indirect) {
> + LLVMValueRef swizzle_vec =
> + lp_build_const_int_vec(bld->bld_base.base.gallivm,
> uint_bld->type, swizzle);
> + LLVMValueRef length_vec =
> + lp_build_const_int_vec(bld->bld_base.base.gallivm,
> uint_bld->type,
> + bld->bld_base.base.type.length);
> + LLVMValueRef index_vec; /* index into the const buffer */
> + LLVMValueRef temps_array;
> + LLVMTypeRef float4_ptr_type;
> +
> + /* index_vec = (indirect_index * 4 + swizzle) * length */
> + index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
> + index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
> + index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
> +
> + /* cast temps_array pointer to float* */
> + float4_ptr_type =
> LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context),
> 0);
> + temps_array = LLVMBuildBitCast(builder, bld->temps_array,
> + float4_ptr_type, "");
> +
> + /* Gather values from the temporary register array */
> + res = build_gather(bld, temps_array, index_vec);
> }
> -
> - if (reg->Register.Negate) {
> - res = lp_build_negate( &bld->base, res );
> + else {
> + LLVMValueRef temp_ptr;
> + temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
> swizzle);
> + res = LLVMBuildLoad(builder, temp_ptr, "");
> + if (!res)
> + return bld->bld_base.base.undef;
> }
>
> return res;
> }
>
> +static LLVMValueRef
> +emit_fetch_system_value(
> + struct lp_build_tgsi_context * bld_base,
> + const struct tgsi_full_src_register * reg,
> + const unsigned swizzle)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
> + struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
> + LLVMBuilderRef builder = gallivm->builder;
> + LLVMValueRef index; /* index into the system value array */
> + LLVMValueRef scalar, scalar_ptr;
> +
> + assert(!reg->Register.Indirect);
> +
> + index = lp_build_const_int32(gallivm, reg->Register.Index * 4 +
> swizzle);
> +
> + scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array,
> &index, 1, "");
> + scalar = LLVMBuildLoad(builder, scalar_ptr, "");
> +
> + return lp_build_broadcast_scalar(&bld->bld_base.base, scalar);
> +}
>
> /**
> * Register fetch with derivatives.
> @@ -785,27 +717,21 @@ emit_fetch(
> static void
> emit_fetch_deriv(
> struct lp_build_tgsi_soa_context *bld,
> - const struct tgsi_full_instruction *inst,
> - unsigned index,
> - const unsigned chan_index,
> + LLVMValueRef src,
> LLVMValueRef *res,
> LLVMValueRef *ddx,
> LLVMValueRef *ddy)
> {
> - LLVMValueRef src;
> -
> - src = emit_fetch(bld, inst, index, chan_index);
> -
> if(res)
> *res = src;
>
> /* TODO: use interpolation coeffs for inputs */
>
> if(ddx)
> - *ddx = lp_build_ddx(&bld->base, src);
> + *ddx = lp_build_ddx(&bld->bld_base.base, src);
>
> if(ddy)
> - *ddy = lp_build_ddy(&bld->base, src);
> + *ddy = lp_build_ddy(&bld->bld_base.base, src);
> }
>
>
> @@ -818,7 +744,7 @@ emit_fetch_predicate(
> const struct tgsi_full_instruction *inst,
> LLVMValueRef *pred)
> {
> - LLVMBuilderRef builder = bld->base.gallivm->builder;
> + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
> unsigned index;
> unsigned char swizzles[4];
> LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
> @@ -858,11 +784,11 @@ emit_fetch_predicate(
> * is needlessly causing two comparisons due to storing the
> intermediate
> * result as float vector instead of an integer mask
> vector.
> */
> - value = lp_build_compare(bld->base.gallivm,
> - bld->base.type,
> + value = lp_build_compare(bld->bld_base.base.gallivm,
> + bld->bld_base.base.type,
> PIPE_FUNC_NOTEQUAL,
> value,
> - bld->base.zero);
> + bld->bld_base.base.zero);
> if (inst->Predicate.Negate) {
> value = LLVMBuildNot(builder, value, "");
> }
> @@ -881,15 +807,16 @@ emit_fetch_predicate(
> * Register store.
> */
> static void
> -emit_store(
> - struct lp_build_tgsi_soa_context *bld,
> +emit_store_chan(
> + struct lp_build_tgsi_context *bld_base,
> const struct tgsi_full_instruction *inst,
> unsigned index,
> unsigned chan_index,
> LLVMValueRef pred,
> LLVMValueRef value)
> {
> - struct gallivm_state *gallivm = bld->base.gallivm;
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
> + struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
> LLVMBuilderRef builder = gallivm->builder;
> const struct tgsi_full_dst_register *reg = &inst->Dst[index];
> struct lp_build_context *uint_bld = &bld->uint_bld;
> @@ -900,13 +827,13 @@ emit_store(
> break;
>
> case TGSI_SAT_ZERO_ONE:
> - value = lp_build_max(&bld->base, value, bld->base.zero);
> - value = lp_build_min(&bld->base, value, bld->base.one);
> + value = lp_build_max(&bld->bld_base.base, value,
> bld->bld_base.base.zero);
> + value = lp_build_min(&bld->bld_base.base, value,
> bld->bld_base.base.one);
> break;
>
> case TGSI_SAT_MINUS_PLUS_ONE:
> - value = lp_build_max(&bld->base, value,
> lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0));
> - value = lp_build_min(&bld->base, value, bld->base.one);
> + value = lp_build_max(&bld->bld_base.base, value,
> lp_build_const_vec(bld->bld_base.base.gallivm,
> bld->bld_base.base.type, -1.0));
> + value = lp_build_min(&bld->bld_base.base, value,
> bld->bld_base.base.one);
> break;
>
> default:
> @@ -919,7 +846,8 @@ emit_store(
> reg->Register.Index,
> ®->Indirect);
> } else {
> - assert(reg->Register.Index <=
> bld->info->file_max[reg->Register.File]);
> + assert(reg->Register.Index <=
> +
> bld->bld_base.info->file_max[reg->Register.File]);
> }
>
> switch( reg->Register.File ) {
> @@ -928,7 +856,7 @@ emit_store(
> LLVMValueRef chan_vec =
> lp_build_const_int_vec(gallivm, uint_bld->type,
> chan_index);
> LLVMValueRef length_vec =
> - lp_build_const_int_vec(gallivm, uint_bld->type,
> bld->base.type.length);
> + lp_build_const_int_vec(gallivm, uint_bld->type,
> bld->bld_base.base.type.length);
> LLVMValueRef index_vec; /* indexes into the temp registers
> */
> LLVMValueRef outputs_array;
> LLVMValueRef pixel_offsets;
> @@ -937,7 +865,7 @@ emit_store(
>
> /* build pixel offset vector: {0, 1, 2, 3, ...} */
> pixel_offsets = uint_bld->undef;
> - for (i = 0; i < bld->base.type.length; i++) {
> + for (i = 0; i < bld->bld_base.base.type.length; i++) {
> LLVMValueRef ii = lp_build_const_int32(gallivm, i);
> pixel_offsets = LLVMBuildInsertElement(builder,
> pixel_offsets,
> ii, ii, "");
> @@ -959,7 +887,7 @@ emit_store(
> &bld->exec_mask, pred);
> }
> else {
> - LLVMValueRef out_ptr = get_output_ptr(bld,
> reg->Register.Index,
> + LLVMValueRef out_ptr = lp_get_output_ptr(bld,
> reg->Register.Index,
> chan_index);
> lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr);
> }
> @@ -971,7 +899,7 @@ emit_store(
> lp_build_const_int_vec(gallivm, uint_bld->type,
> chan_index);
> LLVMValueRef length_vec =
> lp_build_const_int_vec(gallivm, uint_bld->type,
> - bld->base.type.length);
> + bld->bld_base.base.type.length);
> LLVMValueRef index_vec; /* indexes into the temp registers
> */
> LLVMValueRef temps_array;
> LLVMValueRef pixel_offsets;
> @@ -980,7 +908,7 @@ emit_store(
>
> /* build pixel offset vector: {0, 1, 2, 3, ...} */
> pixel_offsets = uint_bld->undef;
> - for (i = 0; i < bld->base.type.length; i++) {
> + for (i = 0; i < bld->bld_base.base.type.length; i++) {
> LLVMValueRef ii = lp_build_const_int32(gallivm, i);
> pixel_offsets = LLVMBuildInsertElement(builder,
> pixel_offsets,
> ii, ii, "");
> @@ -1002,7 +930,7 @@ emit_store(
> &bld->exec_mask, pred);
> }
> else {
> - LLVMValueRef temp_ptr = get_temp_ptr(bld,
> reg->Register.Index,
> + LLVMValueRef temp_ptr = lp_get_temp_ptr_soa(bld,
> reg->Register.Index,
> chan_index);
> lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
> }
> @@ -1023,6 +951,27 @@ emit_store(
> }
> }
>
> +static void
> +emit_store(
> + struct lp_build_tgsi_context * bld_base,
> + const struct tgsi_full_instruction * inst,
> + const struct tgsi_opcode_info * info,
> + LLVMValueRef dst[4])
> +
> +{
> + unsigned chan_index;
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
> +
> + if(info->num_dst) {
> + LLVMValueRef pred[TGSI_NUM_CHANNELS];
> +
> + emit_fetch_predicate( bld, inst, pred );
> +
> + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> + emit_store_chan(bld_base, inst, 0, chan_index,
> pred[chan_index], dst[chan_index]);
> + }
> + }
> +}
>
> /**
> * High-level instruction translators.
> @@ -1034,7 +983,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
> enum lp_build_tex_modifier modifier,
> LLVMValueRef *texel)
> {
> - LLVMBuilderRef builder = bld->base.gallivm->builder;
> + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
> unsigned unit;
> LLVMValueRef lod_bias, explicit_lod;
> LLVMValueRef oow = NULL;
> @@ -1047,7 +996,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
> if (!bld->sampler) {
> _debug_printf("warning: found texture instruction but no
> sampler generator supplied\n");
> for (i = 0; i < 4; i++) {
> - texel[i] = bld->base.undef;
> + texel[i] = bld->bld_base.base.undef;
> }
> return;
> }
> @@ -1079,12 +1028,12 @@ emit_tex( struct lp_build_tgsi_soa_context
> *bld,
> }
>
> if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
> - lod_bias = emit_fetch( bld, inst, 0, 3 );
> + lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
> explicit_lod = NULL;
> }
> else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
> lod_bias = NULL;
> - explicit_lod = emit_fetch( bld, inst, 0, 3 );
> + explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3
> );
> }
> else {
> lod_bias = NULL;
> @@ -1092,43 +1041,43 @@ emit_tex( struct lp_build_tgsi_soa_context
> *bld,
> }
>
> if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
> - oow = emit_fetch( bld, inst, 0, 3 );
> - oow = lp_build_rcp(&bld->base, oow);
> + oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
> + oow = lp_build_rcp(&bld->bld_base.base, oow);
> }
>
> for (i = 0; i < num_coords; i++) {
> - coords[i] = emit_fetch( bld, inst, 0, i );
> + coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
> if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
> - coords[i] = lp_build_mul(&bld->base, coords[i], oow);
> + coords[i] = lp_build_mul(&bld->bld_base.base, coords[i],
> oow);
> }
> for (i = num_coords; i < 3; i++) {
> - coords[i] = bld->base.undef;
> + coords[i] = bld->bld_base.base.undef;
> }
>
> if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
> - LLVMValueRef index0 = lp_build_const_int32(bld->base.gallivm,
> 0);
> + LLVMValueRef index0 =
> lp_build_const_int32(bld->bld_base.base.gallivm, 0);
> for (i = 0; i < num_coords; i++) {
> - LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
> - LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
> + LLVMValueRef src1 = lp_build_emit_fetch( &bld->bld_base,
> inst, 1, i );
> + LLVMValueRef src2 = lp_build_emit_fetch( &bld->bld_base,
> inst, 2, i );
> ddx[i] = LLVMBuildExtractElement(builder, src1, index0,
> "");
> ddy[i] = LLVMBuildExtractElement(builder, src2, index0,
> "");
> }
> unit = inst->Src[3].Register.Index;
> } else {
> for (i = 0; i < num_coords; i++) {
> - ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
> - ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
> + ddx[i] = lp_build_scalar_ddx( &bld->bld_base.base,
> coords[i] );
> + ddy[i] = lp_build_scalar_ddy( &bld->bld_base.base,
> coords[i] );
> }
> unit = inst->Src[1].Register.Index;
> }
> for (i = num_coords; i < 3; i++) {
> - ddx[i] = LLVMGetUndef(bld->base.elem_type);
> - ddy[i] = LLVMGetUndef(bld->base.elem_type);
> + ddx[i] = LLVMGetUndef(bld->bld_base.base.elem_type);
> + ddy[i] = LLVMGetUndef(bld->bld_base.base.elem_type);
> }
>
> bld->sampler->emit_fetch_texel(bld->sampler,
> - bld->base.gallivm,
> - bld->base.type,
> + bld->bld_base.base.gallivm,
> + bld->bld_base.base.type,
> unit, num_coords, coords,
> ddx, ddy,
> lod_bias, explicit_lod,
> @@ -1144,10 +1093,10 @@ near_end_of_shader(struct
> lp_build_tgsi_soa_context *bld,
> for (i = 0; i < 5; i++) {
> unsigned opcode;
>
> - if (pc + i >= bld->info->num_instructions)
> + if (pc + i >= bld->bld_base.info->num_instructions)
> return TRUE;
>
> - opcode = bld->instructions[pc + i].Instruction.Opcode;
> + opcode = bld->bld_base.instructions[pc +
> i].Instruction.Opcode;
>
> if (opcode == TGSI_OPCODE_END)
> return TRUE;
> @@ -1182,9 +1131,9 @@ emit_kil(
> const struct tgsi_full_instruction *inst,
> int pc)
> {
> - LLVMBuilderRef builder = bld->base.gallivm->builder;
> + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
> const struct tgsi_full_src_register *reg = &inst->Src[0];
> - LLVMValueRef terms[NUM_CHANNELS];
> + LLVMValueRef terms[TGSI_NUM_CHANNELS];
> LLVMValueRef mask;
> unsigned chan_index;
>
> @@ -1197,10 +1146,10 @@ emit_kil(
> swizzle = tgsi_util_get_full_src_register_swizzle( reg,
> chan_index );
>
> /* Check if the component has not been already tested. */
> - assert(swizzle < NUM_CHANNELS);
> + assert(swizzle < TGSI_NUM_CHANNELS);
> if( !terms[swizzle] )
> /* TODO: change the comparison operator instead of setting
> the sign */
> - terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
> + terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst,
> 0, chan_index );
> }
>
> mask = NULL;
> @@ -1211,7 +1160,7 @@ emit_kil(
> /*
> * If term < 0 then mask = 0 else mask = ~0.
> */
> - chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL,
> terms[chan_index], bld->base.zero);
> + chan_mask = lp_build_cmp(&bld->bld_base.base,
> PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
>
> if(mask)
> mask = LLVMBuildAnd(builder, mask, chan_mask, "");
> @@ -1237,10 +1186,9 @@ emit_kil(
> */
> static void
> emit_kilp(struct lp_build_tgsi_soa_context *bld,
> - const struct tgsi_full_instruction *inst,
> - int pc)
> + int pc)
> {
> - LLVMBuilderRef builder = bld->base.gallivm->builder;
> + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
> LLVMValueRef mask;
>
> /* For those channels which are "alive", disable fragment shader
> @@ -1250,7 +1198,7 @@ emit_kilp(struct lp_build_tgsi_soa_context
> *bld,
> mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask,
> "kilp");
> }
> else {
> - LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type);
> + LLVMValueRef zero =
> LLVMConstNull(bld->bld_base.base.int_vec_type);
> mask = zero;
> }
>
> @@ -1268,7 +1216,7 @@ emit_kilp(struct lp_build_tgsi_soa_context
> *bld,
> static void
> emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
> {
> - struct gallivm_state *gallivm = bld->base.gallivm;
> + struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
> LLVMBuilderRef builder = gallivm->builder;
> LLVMValueRef temp_ptr;
> LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
> @@ -1276,7 +1224,7 @@ emit_dump_temps(struct
> lp_build_tgsi_soa_context *bld)
> LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
> LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
> int index;
> - int n = bld->info->file_max[TGSI_FILE_TEMPORARY];
> + int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
>
> for (index = 0; index < n; index++) {
> LLVMValueRef idx = lp_build_const_int32(gallivm, index);
> @@ -1286,7 +1234,7 @@ emit_dump_temps(struct
> lp_build_tgsi_soa_context *bld)
> lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
>
> for (chan = 0; chan < 4; chan++) {
> - temp_ptr = get_temp_ptr(bld, index, chan);
> + temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
> res = LLVMBuildLoad(builder, temp_ptr, "");
> v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
> v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
> @@ -1307,31 +1255,32 @@ emit_dump_temps(struct
> lp_build_tgsi_soa_context *bld)
>
>
>
> -static void
> -emit_declaration(
> - struct lp_build_tgsi_soa_context *bld,
> +void
> +lp_emit_declaration_soa(
> + struct lp_build_tgsi_context *bld_base,
> const struct tgsi_full_declaration *decl)
> {
> - struct gallivm_state *gallivm = bld->base.gallivm;
> - LLVMTypeRef vec_type = bld->base.vec_type;
> + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
> + struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
> + LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
> const unsigned first = decl->Range.First;
> const unsigned last = decl->Range.Last;
> unsigned idx, i;
>
> for (idx = first; idx <= last; ++idx) {
> - assert(last <= bld->info->file_max[decl->Declaration.File]);
> + assert(last <=
> bld->bld_base.info->file_max[decl->Declaration.File]);
> switch (decl->Declaration.File) {
> case TGSI_FILE_TEMPORARY:
> assert(idx < LP_MAX_TGSI_TEMPS);
> if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
> - for (i = 0; i < NUM_CHANNELS; i++)
> + for (i = 0; i < TGSI_NUM_CHANNELS; i++)
> bld->temps[idx][i] = lp_build_alloca(gallivm,
> vec_type, "temp");
> }
> break;
>
> case TGSI_FILE_OUTPUT:
> if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
> - for (i = 0; i < NUM_CHANNELS; i++)
> + for (i = 0; i < TGSI_NUM_CHANNELS; i++)
> bld->outputs[idx][i] = lp_build_alloca(gallivm,
> vec_type,
> "output");
> }
> @@ -1339,13 +1288,13 @@ emit_declaration(
>
> case TGSI_FILE_ADDRESS:
> assert(idx < LP_MAX_TGSI_ADDRS);
> - for (i = 0; i < NUM_CHANNELS; i++)
> + for (i = 0; i < TGSI_NUM_CHANNELS; i++)
> bld->addr[idx][i] = lp_build_alloca(gallivm, vec_type,
> "addr");
> break;
>
> case TGSI_FILE_PREDICATE:
> assert(idx < LP_MAX_TGSI_PREDS);
> - for (i = 0; i < NUM_CHANNELS; i++)
> + for (i = 0; i < TGSI_NUM_CHANNELS; i++)
> bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
> "predicate");
> break;
> @@ -1358,965 +1307,427 @@ emit_declaration(
> }
>
>
> -/**
> - * Emit LLVM for one TGSI instruction.
> - * \param return TRUE for success, FALSE otherwise
> - */
> -static boolean
> -emit_instruction(
> - struct lp_build_tgsi_soa_context *bld,
> - const struct tgsi_full_instruction *inst,
> - const struct tgsi_opcode_info *info,
> - int *pc)
> +void lp_emit_immediate_soa(
> + struct lp_build_tgsi_context *bld_base,
> + const struct tgsi_full_immediate *imm)
> {
> - unsigned chan_index;
> - LLVMValueRef src0, src1, src2;
> - LLVMValueRef tmp0, tmp1, tmp2;
> - LLVMValueRef tmp3 = NULL;
> - LLVMValueRef tmp4 = NULL;
> - LLVMValueRef tmp5 = NULL;
> - LLVMValueRef tmp6 = NULL;
> - LLVMValueRef tmp7 = NULL;
> - LLVMValueRef res;
> - LLVMValueRef dst0[NUM_CHANNELS];
> + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
> + struct gallivm_state * gallivm = bld_base->base.gallivm;
>
> - /*
> - * Stores and write masks are handled in a general fashion after
> the long
> - * instruction opcode switch statement.
> - *
> - * Although not stricitly necessary, we avoid generating
> instructions for
> - * channels which won't be stored, in cases where's that easy.
> For some
> - * complex instructions, like texture sampling, it is more
> convenient to
> - * assume a full writemask and then let LLVM optimization passes
> eliminate
> - * redundant code.
> - */
> + /* simply copy the immediate values into the next immediates[]
> slot */
> + unsigned i;
> + const uint size = imm->Immediate.NrTokens - 1;
> + assert(size <= 4);
> + assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
>
> - (*pc)++;
> + for( i = 0; i < size; ++i )
> + bld->immediates[bld->num_immediates][i] =
> + lp_build_const_vec(gallivm, bld_base->base.type,
> imm->u[i].Float);
>
> - assert(info->num_dst <= 1);
> - if (info->num_dst) {
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - dst0[chan_index] = bld->base.undef;
> - }
> - }
> + for( i = size; i < 4; ++i )
> + bld->immediates[bld->num_immediates][i] =
> bld_base->base.undef;
>
> - switch (inst->Instruction.Opcode) {
> - case TGSI_OPCODE_ARL:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - tmp0 = emit_fetch( bld, inst, 0, chan_index );
> - tmp0 = lp_build_floor(&bld->base, tmp0);
> - dst0[chan_index] = tmp0;
> - }
> - break;
> + bld->num_immediates++;
> +}
>
> - case TGSI_OPCODE_MOV:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
> - }
> - break;
> +static void
> +ddx_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_LIT:
> - if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ) {
> - dst0[TGSI_CHAN_X] = bld->base.one;
> - }
> - if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ) {
> - src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> - dst0[TGSI_CHAN_Y] = lp_build_max( &bld->base, src0,
> bld->base.zero);
> - }
> - if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) {
> - /* XMM[1] = SrcReg[0].yyyy */
> - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
> - /* XMM[1] = max(XMM[1], 0) */
> - tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
> - /* XMM[2] = SrcReg[0].wwww */
> - tmp2 = emit_fetch( bld, inst, 0, TGSI_CHAN_W );
> - tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
> - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> - tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0,
> bld->base.zero);
> - dst0[TGSI_CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1,
> bld->base.zero);
> - }
> - if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) ) {
> - dst0[TGSI_CHAN_W] = bld->base.one;
> - }
> - break;
> + emit_fetch_deriv(bld, emit_data->args[0], NULL,
> + &emit_data->output[emit_data->chan], NULL);
> +}
>
> - case TGSI_OPCODE_RCP:
> - /* TGSI_OPCODE_RECIP */
> - src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> - res = lp_build_rcp(&bld->base, src0);
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - dst0[chan_index] = res;
> - }
> - break;
> +static void
> +ddy_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_RSQ:
> - /* TGSI_OPCODE_RECIPSQRT */
> - src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> - src0 = lp_build_abs(&bld->base, src0);
> - res = lp_build_rsqrt(&bld->base, src0);
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - dst0[chan_index] = res;
> - }
> - break;
> + emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
> + &emit_data->output[emit_data->chan]);
> +}
>
> - case TGSI_OPCODE_EXP:
> - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
> - TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ||
> - TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) {
> - LLVMValueRef *p_exp2_int_part = NULL;
> - LLVMValueRef *p_frac_part = NULL;
> - LLVMValueRef *p_exp2 = NULL;
> -
> - src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> -
> - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
> - p_exp2_int_part = &tmp0;
> - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ))
> - p_frac_part = &tmp1;
> - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
> - p_exp2 = &tmp2;
> -
> - lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part,
> p_frac_part, p_exp2);
> -
> - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
> - dst0[TGSI_CHAN_X] = tmp0;
> - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ))
> - dst0[TGSI_CHAN_Y] = tmp1;
> - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
> - dst0[TGSI_CHAN_Z] = tmp2;
> - }
> - /* dst.w = 1.0 */
> - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W )) {
> - dst0[TGSI_CHAN_W] = bld->base.one;
> - }
> - break;
> +static void
> +kilp_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_LOG:
> - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
> - TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ||
> - TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) {
> - LLVMValueRef *p_floor_log2 = NULL;
> - LLVMValueRef *p_exp = NULL;
> - LLVMValueRef *p_log2 = NULL;
> -
> - src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> - src0 = lp_build_abs( &bld->base, src0 );
> -
> - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
> - p_floor_log2 = &tmp0;
> - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ))
> - p_exp = &tmp1;
> - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
> - p_log2 = &tmp2;
> -
> - lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2,
> p_log2);
> -
> - /* dst.x = floor(lg2(abs(src.x))) */
> - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
> - dst0[TGSI_CHAN_X] = tmp0;
> - /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
> - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y )) {
> - dst0[TGSI_CHAN_Y] = lp_build_div( &bld->base, src0,
> tmp1);
> - }
> - /* dst.z = lg2(abs(src.x)) */
> - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
> - dst0[TGSI_CHAN_Z] = tmp2;
> - }
> - /* dst.w = 1.0 */
> - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W )) {
> - dst0[TGSI_CHAN_W] = bld->base.one;
> - }
> - break;
> + emit_kilp(bld, bld_base->pc - 1);
> +}
>
> - case TGSI_OPCODE_MUL:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - src0 = emit_fetch( bld, inst, 0, chan_index );
> - src1 = emit_fetch( bld, inst, 1, chan_index );
> - dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
> - }
> - break;
> +static void
> +kil_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_ADD:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - src0 = emit_fetch( bld, inst, 0, chan_index );
> - src1 = emit_fetch( bld, inst, 1, chan_index );
> - dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
> - }
> - break;
> + emit_kil(bld, emit_data->inst, bld_base->pc - 1);
> +}
>
> - case TGSI_OPCODE_DP3:
> - /* TGSI_OPCODE_DOT3 */
> - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> - tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
> - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
> - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
> - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
> - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
> - tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
> - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
> - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
> - tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - dst0[chan_index] = tmp0;
> - }
> - break;
> +static void
> +tex_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_DP4:
> - /* TGSI_OPCODE_DOT4 */
> - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> - tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
> - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
> - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
> - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
> - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
> - tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
> - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
> - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
> - tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_W );
> - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_W );
> - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
> - tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - dst0[chan_index] = tmp0;
> - }
> - break;
> + emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
> emit_data->output);
> +}
>
> - case TGSI_OPCODE_DST:
> - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) {
> - dst0[TGSI_CHAN_X] = bld->base.one;
> - }
> - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) {
> - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
> - tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
> - dst0[TGSI_CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
> - }
> - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) {
> - dst0[TGSI_CHAN_Z] = emit_fetch( bld, inst, 0, TGSI_CHAN_Z
> );
> - }
> - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) {
> - dst0[TGSI_CHAN_W] = emit_fetch( bld, inst, 1, TGSI_CHAN_W
> );
> - }
> - break;
> +static void
> +txb_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_MIN:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - src0 = emit_fetch( bld, inst, 0, chan_index );
> - src1 = emit_fetch( bld, inst, 1, chan_index );
> - dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
> - }
> - break;
> + emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
> + emit_data->output);
> +}
>
> - case TGSI_OPCODE_MAX:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - src0 = emit_fetch( bld, inst, 0, chan_index );
> - src1 = emit_fetch( bld, inst, 1, chan_index );
> - dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
> - }
> - break;
> +static void
> +txd_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_SLT:
> - /* TGSI_OPCODE_SETLT */
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - src0 = emit_fetch( bld, inst, 0, chan_index );
> - src1 = emit_fetch( bld, inst, 1, chan_index );
> - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1
> );
> - dst0[chan_index] = lp_build_select( &bld->base, tmp0,
> bld->base.one, bld->base.zero );
> - }
> - break;
> + emit_tex(bld, emit_data->inst,
> LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
> + emit_data->output);
> +}
>
> - case TGSI_OPCODE_SGE:
> - /* TGSI_OPCODE_SETGE */
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - src0 = emit_fetch( bld, inst, 0, chan_index );
> - src1 = emit_fetch( bld, inst, 1, chan_index );
> - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0,
> src1 );
> - dst0[chan_index] = lp_build_select( &bld->base, tmp0,
> bld->base.one, bld->base.zero );
> - }
> - break;
> +static void
> +txl_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_MAD:
> - /* TGSI_OPCODE_MADD */
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - tmp0 = emit_fetch( bld, inst, 0, chan_index );
> - tmp1 = emit_fetch( bld, inst, 1, chan_index );
> - tmp2 = emit_fetch( bld, inst, 2, chan_index );
> - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
> - tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
> - dst0[chan_index] = tmp0;
> - }
> - break;
> + emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
> + emit_data->output);
> +}
>
> - case TGSI_OPCODE_SUB:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - tmp0 = emit_fetch( bld, inst, 0, chan_index );
> - tmp1 = emit_fetch( bld, inst, 1, chan_index );
> - dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
> - }
> - break;
> +static void
> +txp_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_LRP:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - src0 = emit_fetch( bld, inst, 0, chan_index );
> - src1 = emit_fetch( bld, inst, 1, chan_index );
> - src2 = emit_fetch( bld, inst, 2, chan_index );
> - tmp0 = lp_build_sub( &bld->base, src1, src2 );
> - tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
> - dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
> - }
> - break;
> + emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
> + emit_data->output);
> +}
>
> - case TGSI_OPCODE_CND:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - src0 = emit_fetch( bld, inst, 0, chan_index );
> - src1 = emit_fetch( bld, inst, 1, chan_index );
> - src2 = emit_fetch( bld, inst, 2, chan_index );
> - tmp1 = lp_build_const_vec(bld->base.gallivm,
> bld->base.type, 0.5);
> - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2,
> tmp1);
> - dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0,
> src1 );
> - }
> - break;
> -
> - case TGSI_OPCODE_DP2A:
> - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); /* xmm0 =
> src[0].x */
> - tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); /* xmm1 =
> src[1].x */
> - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /*
> xmm0 = xmm0 * xmm1 */
> - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); /* xmm1 =
> src[0].y */
> - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); /* xmm2 =
> src[1].y */
> - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /*
> xmm1 = xmm1 * xmm2 */
> - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /*
> xmm0 = xmm0 + xmm1 */
> - tmp1 = emit_fetch( bld, inst, 2, TGSI_CHAN_X ); /* xmm1 =
> src[2].x */
> - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /*
> xmm0 = xmm0 + xmm1 */
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
> - }
> - break;
> +static void
> +cal_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_FRC:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - src0 = emit_fetch( bld, inst, 0, chan_index );
> - tmp0 = lp_build_floor(&bld->base, src0);
> - tmp0 = lp_build_sub(&bld->base, src0, tmp0);
> - dst0[chan_index] = tmp0;
> - }
> - break;
> + lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
> + &bld_base->pc);
> +}
>
> - case TGSI_OPCODE_CLAMP:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - tmp0 = emit_fetch( bld, inst, 0, chan_index );
> - src1 = emit_fetch( bld, inst, 1, chan_index );
> - src2 = emit_fetch( bld, inst, 2, chan_index );
> - tmp0 = lp_build_max(&bld->base, tmp0, src1);
> - tmp0 = lp_build_min(&bld->base, tmp0, src2);
> - dst0[chan_index] = tmp0;
> - }
> - break;
> +static void
> +ret_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_FLR:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - tmp0 = emit_fetch( bld, inst, 0, chan_index );
> - dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
> - }
> - break;
> + lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
> +}
>
> - case TGSI_OPCODE_ROUND:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - tmp0 = emit_fetch( bld, inst, 0, chan_index );
> - dst0[chan_index] = lp_build_round(&bld->base, tmp0);
> - }
> - break;
> +static void
> +brk_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_EX2: {
> - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> - tmp0 = lp_build_exp2( &bld->base, tmp0);
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - dst0[chan_index] = tmp0;
> - }
> - break;
> - }
> + lp_exec_break(&bld->exec_mask);
> +}
>
> - case TGSI_OPCODE_LG2:
> - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> - tmp0 = lp_build_log2( &bld->base, tmp0);
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - dst0[chan_index] = tmp0;
> - }
> - break;
> +static void
> +if_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + LLVMValueRef tmp;
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_POW:
> - src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> - src1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
> - res = lp_build_pow( &bld->base, src0, src1 );
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - dst0[chan_index] = res;
> - }
> - break;
> + tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
> + emit_data->args[0], bld->bld_base.base.zero);
> + lp_exec_mask_cond_push(&bld->exec_mask, tmp);
> +}
>
> - case TGSI_OPCODE_XPD:
> - if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
> - TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ) {
> - tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
> - tmp3 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
> - }
> - if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
> - TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) {
> - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
> - tmp4 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
> - }
> - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) {
> - tmp2 = tmp0;
> - tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
> - tmp5 = tmp3;
> - tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
> - tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
> - dst0[TGSI_CHAN_X] = tmp2;
> - }
> - if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ||
> - TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) {
> - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
> - tmp5 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> - }
> - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) {
> - tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
> - tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
> - tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
> - dst0[TGSI_CHAN_Y] = tmp3;
> - }
> - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) {
> - tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
> - tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
> - tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
> - dst0[TGSI_CHAN_Z] = tmp5;
> - }
> - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) {
> - dst0[TGSI_CHAN_W] = bld->base.one;
> - }
> - break;
> +static void
> +bgnloop_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_ABS:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - tmp0 = emit_fetch( bld, inst, 0, chan_index );
> - dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
> - }
> - break;
> + lp_exec_bgnloop(&bld->exec_mask);
> +}
>
> - case TGSI_OPCODE_RCC:
> - /* deprecated? */
> - assert(0);
> - return FALSE;
> -
> - case TGSI_OPCODE_DPH:
> - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> - tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
> - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
> - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
> - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
> - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
> - tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
> - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
> - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
> - tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> - tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_W );
> - tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - dst0[chan_index] = tmp0;
> - }
> - break;
> +static void
> +bgnsub_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_COS:
> - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> - tmp0 = lp_build_cos( &bld->base, tmp0 );
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - dst0[chan_index] = tmp0;
> - }
> - break;
> + lp_exec_mask_bgnsub(&bld->exec_mask);
> +}
>
> - case TGSI_OPCODE_DDX:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - emit_fetch_deriv( bld, inst, 0, chan_index, NULL,
> &dst0[chan_index], NULL);
> - }
> - break;
> +static void
> +else_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_DDY:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL,
> &dst0[chan_index]);
> - }
> - break;
> + lp_exec_mask_cond_invert(&bld->exec_mask);
> +}
>
> - case TGSI_OPCODE_KILP:
> - /* predicated kill */
> - emit_kilp( bld, inst, (*pc)-1 );
> - break;
> +static void
> +endif_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_KIL:
> - /* conditional kill */
> - emit_kil( bld, inst, (*pc)-1 );
> - break;
> + lp_exec_mask_cond_pop(&bld->exec_mask);
> +}
>
> - case TGSI_OPCODE_PK2H:
> - return FALSE;
> - break;
> +static void
> +endloop_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_PK2US:
> - return FALSE;
> - break;
> + lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
> +}
>
> - case TGSI_OPCODE_PK4B:
> - return FALSE;
> - break;
> +static void
> +endsub_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_PK4UB:
> - return FALSE;
> - break;
> + lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
> +}
>
> - case TGSI_OPCODE_RFL:
> - return FALSE;
> - break;
> +static void
> +cont_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_SEQ:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - src0 = emit_fetch( bld, inst, 0, chan_index );
> - src1 = emit_fetch( bld, inst, 1, chan_index );
> - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0,
> src1 );
> - dst0[chan_index] = lp_build_select( &bld->base, tmp0,
> bld->base.one, bld->base.zero );
> - }
> - break;
> + lp_exec_continue(&bld->exec_mask);
> +}
>
> - case TGSI_OPCODE_SFL:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - dst0[chan_index] = bld->base.zero;
> - }
> - break;
> +/* XXX: Refactor and move it to lp_bld_action.c
> + *
> + * XXX: What do the comments about xmm registers mean? Maybe they
> are left over
> + * from old code, but there is no garauntee that LLVM will use those
> registers
> + * for this code.
> + *
> + * XXX: There should be no calls to lp_build_emit_fetch in this
> function. This
> + * should be handled by the emit_data->fetch_args function. */
> +static void
> +nrm_emit(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + LLVMValueRef tmp0, tmp1;
> + LLVMValueRef tmp4 = NULL;
> + LLVMValueRef tmp5 = NULL;
> + LLVMValueRef tmp6 = NULL;
> + LLVMValueRef tmp7 = NULL;
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - case TGSI_OPCODE_SGT:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - src0 = emit_fetch( bld, inst, 0, chan_index );
> - src1 = emit_fetch( bld, inst, 1, chan_index );
> - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0,
> src1 );
> - dst0[chan_index] = lp_build_select( &bld->base, tmp0,
> bld->base.one, bld->base.zero );
> - }
> - break;
> + uint dims = (emit_data->inst->Instruction.Opcode ==
> TGSI_OPCODE_NRM) ? 3 : 4;
>
> - case TGSI_OPCODE_SIN:
> - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> - tmp0 = lp_build_sin( &bld->base, tmp0 );
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - dst0[chan_index] = tmp0;
> - }
> - break;
> + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
> + TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
> + TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
> + (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) &&
> dims == 4)) {
>
> - case TGSI_OPCODE_SLE:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - src0 = emit_fetch( bld, inst, 0, chan_index );
> - src1 = emit_fetch( bld, inst, 1, chan_index );
> - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0,
> src1 );
> - dst0[chan_index] = lp_build_select( &bld->base, tmp0,
> bld->base.one, bld->base.zero );
> - }
> - break;
> + /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt()
> above). */
>
> - case TGSI_OPCODE_SNE:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - src0 = emit_fetch( bld, inst, 0, chan_index );
> - src1 = emit_fetch( bld, inst, 1, chan_index );
> - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0,
> src1 );
> - dst0[chan_index] = lp_build_select( &bld->base, tmp0,
> bld->base.one, bld->base.zero );
> + /* xmm4 = src.x */
> + /* xmm0 = src.x * src.x */
> + tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0,
> TGSI_CHAN_X);
> + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst,
> TGSI_CHAN_X)) {
> + tmp4 = tmp0;
> }
> - break;
> + tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
>
> - case TGSI_OPCODE_STR:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - dst0[chan_index] = bld->base.one;
> + /* xmm5 = src.y */
> + /* xmm0 = xmm0 + src.y * src.y */
> + tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0,
> TGSI_CHAN_Y);
> + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst,
> TGSI_CHAN_Y)) {
> + tmp5 = tmp1;
> }
> - break;
> -
> - case TGSI_OPCODE_TEX:
> - emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 );
> - break;
> -
> - case TGSI_OPCODE_TXD:
> - emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0
> );
> - break;
> -
> - case TGSI_OPCODE_UP2H:
> - /* deprecated */
> - assert (0);
> - return FALSE;
> - break;
> -
> - case TGSI_OPCODE_UP2US:
> - /* deprecated */
> - assert(0);
> - return FALSE;
> - break;
> -
> - case TGSI_OPCODE_UP4B:
> - /* deprecated */
> - assert(0);
> - return FALSE;
> - break;
> -
> - case TGSI_OPCODE_UP4UB:
> - /* deprecated */
> - assert(0);
> - return FALSE;
> - break;
> -
> - case TGSI_OPCODE_X2D:
> - /* deprecated? */
> - assert(0);
> - return FALSE;
> - break;
> -
> - case TGSI_OPCODE_ARA:
> - /* deprecated */
> - assert(0);
> - return FALSE;
> - break;
> + tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
> + tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
>
> - case TGSI_OPCODE_ARR:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - tmp0 = emit_fetch( bld, inst, 0, chan_index );
> - tmp0 = lp_build_round(&bld->base, tmp0);
> - dst0[chan_index] = tmp0;
> + /* xmm6 = src.z */
> + /* xmm0 = xmm0 + src.z * src.z */
> + tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0,
> TGSI_CHAN_Z);
> + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst,
> TGSI_CHAN_Z)) {
> + tmp6 = tmp1;
> }
> - break;
> -
> - case TGSI_OPCODE_BRA:
> - /* deprecated */
> - assert(0);
> - return FALSE;
> - break;
> -
> - case TGSI_OPCODE_CAL:
> - lp_exec_mask_call(&bld->exec_mask,
> - inst->Label.Label,
> - pc);
> + tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
> + tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
>
> - break;
> -
> - case TGSI_OPCODE_RET:
> - lp_exec_mask_ret(&bld->exec_mask, pc);
> - break;
> -
> - case TGSI_OPCODE_END:
> - if (0) {
> - /* for debugging */
> - emit_dump_temps(bld);
> + if (dims == 4) {
> + /* xmm7 = src.w */
> + /* xmm0 = xmm0 + src.w * src.w */
> + tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst,
> 0, TGSI_CHAN_W);
> + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst,
> TGSI_CHAN_W)) {
> + tmp7 = tmp1;
> + }
> + tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
> + tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
> }
> - *pc = -1;
> - break;
> -
> - case TGSI_OPCODE_SSG:
> - /* TGSI_OPCODE_SGN */
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - tmp0 = emit_fetch( bld, inst, 0, chan_index );
> - dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
> + /* xmm1 = 1 / sqrt(xmm0) */
> + tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
> + /* dst.x = xmm1 * src.x */
> + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst,
> TGSI_CHAN_X)) {
> + emit_data->output[TGSI_CHAN_X] = lp_build_mul(
> &bld->bld_base.base, tmp4, tmp1);
> }
> - break;
> -
> - case TGSI_OPCODE_CMP:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - src0 = emit_fetch( bld, inst, 0, chan_index );
> - src1 = emit_fetch( bld, inst, 1, chan_index );
> - src2 = emit_fetch( bld, inst, 2, chan_index );
> - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0,
> bld->base.zero );
> - dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1,
> src2);
> + /* dst.y = xmm1 * src.y */
> + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst,
> TGSI_CHAN_Y)) {
> + emit_data->output[TGSI_CHAN_Y] = lp_build_mul(
> &bld->bld_base.base, tmp5, tmp1);
> }
> - break;
>
> - case TGSI_OPCODE_SCS:
> - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) {
> - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> - dst0[TGSI_CHAN_X] = lp_build_cos( &bld->base, tmp0 );
> - }
> - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) {
> - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> - dst0[TGSI_CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
> + /* dst.z = xmm1 * src.z */
> + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst,
> TGSI_CHAN_Z)) {
> + emit_data->output[TGSI_CHAN_Z] = lp_build_mul(
> &bld->bld_base.base, tmp6, tmp1);
> }
> - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) {
> - dst0[TGSI_CHAN_Z] = bld->base.zero;
> + /* dst.w = xmm1 * src.w */
> + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)
> && dims == 4) {
> + emit_data->output[TGSI_CHAN_W] = lp_build_mul(
> &bld->bld_base.base, tmp7, tmp1);
> }
> - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) {
> - dst0[TGSI_CHAN_W] = bld->base.one;
> - }
> - break;
> -
> - case TGSI_OPCODE_TXB:
> - emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 );
> - break;
> -
> - case TGSI_OPCODE_NRM:
> - /* fall-through */
> - case TGSI_OPCODE_NRM4:
> - /* 3 or 4-component normalization */
> - {
> - uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ?
> 3 : 4;
> -
> - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) ||
> - TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y) ||
> - TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z) ||
> - (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W) &&
> dims == 4)) {
> -
> - /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt()
> above). */
> -
> - /* xmm4 = src.x */
> - /* xmm0 = src.x * src.x */
> - tmp0 = emit_fetch(bld, inst, 0, TGSI_CHAN_X);
> - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X)) {
> - tmp4 = tmp0;
> - }
> - tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
> -
> - /* xmm5 = src.y */
> - /* xmm0 = xmm0 + src.y * src.y */
> - tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_Y);
> - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) {
> - tmp5 = tmp1;
> - }
> - tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
> - tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> -
> - /* xmm6 = src.z */
> - /* xmm0 = xmm0 + src.z * src.z */
> - tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_Z);
> - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) {
> - tmp6 = tmp1;
> - }
> - tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
> - tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> -
> - if (dims == 4) {
> - /* xmm7 = src.w */
> - /* xmm0 = xmm0 + src.w * src.w */
> - tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_W);
> - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W))
> {
> - tmp7 = tmp1;
> - }
> - tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
> - tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> - }
> -
> - /* xmm1 = 1 / sqrt(xmm0) */
> - tmp1 = lp_build_rsqrt( &bld->base, tmp0);
> -
> - /* dst.x = xmm1 * src.x */
> - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X)) {
> - dst0[TGSI_CHAN_X] = lp_build_mul( &bld->base, tmp4,
> tmp1);
> - }
> -
> - /* dst.y = xmm1 * src.y */
> - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) {
> - dst0[TGSI_CHAN_Y] = lp_build_mul( &bld->base, tmp5,
> tmp1);
> - }
> -
> - /* dst.z = xmm1 * src.z */
> - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) {
> - dst0[TGSI_CHAN_Z] = lp_build_mul( &bld->base, tmp6,
> tmp1);
> - }
> -
> - /* dst.w = xmm1 * src.w */
> - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) &&
> dims == 4) {
> - dst0[TGSI_CHAN_W] = lp_build_mul( &bld->base, tmp7,
> tmp1);
> - }
> - }
> -
> - /* dst.w = 1.0 */
> - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W) && dims
> == 3) {
> - dst0[TGSI_CHAN_W] = bld->base.one;
> - }
> - }
> - break;
> -
> - case TGSI_OPCODE_DIV:
> - /* deprecated */
> - assert( 0 );
> - return FALSE;
> - break;
> -
> - case TGSI_OPCODE_DP2:
> - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); /* xmm0 =
> src[0].x */
> - tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); /* xmm1 =
> src[1].x */
> - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /*
> xmm0 = xmm0 * xmm1 */
> - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); /* xmm1 =
> src[0].y */
> - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); /* xmm2 =
> src[1].y */
> - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /*
> xmm1 = xmm1 * xmm2 */
> - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /*
> xmm0 = xmm0 + xmm1 */
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
> - }
> - break;
> -
> - case TGSI_OPCODE_TXL:
> - emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 );
> - break;
> -
> - case TGSI_OPCODE_TXP:
> - emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 );
> - break;
> -
> - case TGSI_OPCODE_BRK:
> - lp_exec_break(&bld->exec_mask);
> - break;
> -
> - case TGSI_OPCODE_IF:
> - tmp0 = emit_fetch(bld, inst, 0, TGSI_CHAN_X);
> - tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
> - tmp0, bld->base.zero);
> - lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
> - break;
> -
> - case TGSI_OPCODE_BGNLOOP:
> - lp_exec_bgnloop(&bld->exec_mask);
> - break;
> -
> - case TGSI_OPCODE_BGNSUB:
> - lp_exec_mask_bgnsub(&bld->exec_mask);
> - break;
> -
> - case TGSI_OPCODE_ELSE:
> - lp_exec_mask_cond_invert(&bld->exec_mask);
> - break;
> -
> - case TGSI_OPCODE_ENDIF:
> - lp_exec_mask_cond_pop(&bld->exec_mask);
> - break;
> -
> - case TGSI_OPCODE_ENDLOOP:
> - lp_exec_endloop(bld->base.gallivm, &bld->exec_mask);
> - break;
> + }
>
> - case TGSI_OPCODE_ENDSUB:
> - lp_exec_mask_endsub(&bld->exec_mask, pc);
> - break;
> + /* dst.w = 1.0 */
> + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) &&
> dims == 3) {
> + emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
> + }
> +}
>
> - case TGSI_OPCODE_PUSHA:
> - /* deprecated? */
> - assert(0);
> - return FALSE;
> - break;
> +static void emit_prologue(struct lp_build_tgsi_context * bld_base)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
> + struct gallivm_state * gallivm = bld_base->base.gallivm;
>
> - case TGSI_OPCODE_POPA:
> - /* deprecated? */
> - assert(0);
> - return FALSE;
> - break;
> + if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
> + LLVMValueRef array_size =
> + lp_build_const_int32(gallivm,
> +
> bld_base->info->file_max[TGSI_FILE_TEMPORARY]
> * 4 + 4);
> + bld->temps_array = lp_build_array_alloca(gallivm,
> +
> bld_base->base.vec_type,
> array_size,
> + "temp_array");
> + }
>
> - case TGSI_OPCODE_CEIL:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - tmp0 = emit_fetch( bld, inst, 0, chan_index );
> - dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
> - }
> - break;
> + if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
> + LLVMValueRef array_size =
> + lp_build_const_int32(gallivm,
> +
> bld_base->info->file_max[TGSI_FILE_OUTPUT]
> * 4 + 4);
> + bld->outputs_array = lp_build_array_alloca(gallivm,
> +
> bld_base->base.vec_type,
> array_size,
> + "output_array");
> + }
>
> - case TGSI_OPCODE_I2F:
> - /* deprecated? */
> - assert(0);
> - return FALSE;
> - break;
> + /* If we have indirect addressing in inputs we need to copy them
> into
> + * our alloca array to be able to iterate over them */
> + if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
> + unsigned index, chan;
> + LLVMTypeRef vec_type = bld_base->base.vec_type;
> + LLVMValueRef array_size = lp_build_const_int32(gallivm,
> + bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
> + bld->inputs_array = lp_build_array_alloca(gallivm,
> + vec_type, array_size,
> + "input_array");
>
> - case TGSI_OPCODE_NOT:
> - /* deprecated? */
> - assert(0);
> - return FALSE;
> - break;
> + assert(bld_base->info->num_inputs
> + <= bld_base->info->file_max[TGSI_FILE_INPUT]
> + 1);
>
> - case TGSI_OPCODE_TRUNC:
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - tmp0 = emit_fetch( bld, inst, 0, chan_index );
> - dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
> + for (index = 0; index < bld_base->info->num_inputs; ++index) {
> + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
> + LLVMValueRef lindex =
> + lp_build_const_int32(gallivm, index * 4 + chan);
> + LLVMValueRef input_ptr =
> + LLVMBuildGEP(gallivm->builder, bld->inputs_array,
> + &lindex, 1, "");
> + LLVMValueRef value = bld->inputs[index][chan];
> + if (value)
> + LLVMBuildStore(gallivm->builder, value, input_ptr);
> + }
> }
> - break;
> -
> - case TGSI_OPCODE_SHL:
> - /* deprecated? */
> - assert(0);
> - return FALSE;
> - break;
> -
> - case TGSI_OPCODE_ISHR:
> - /* deprecated? */
> - assert(0);
> - return FALSE;
> - break;
> -
> - case TGSI_OPCODE_AND:
> - /* deprecated? */
> - assert(0);
> - return FALSE;
> - break;
> -
> - case TGSI_OPCODE_OR:
> - /* deprecated? */
> - assert(0);
> - return FALSE;
> - break;
> -
> - case TGSI_OPCODE_MOD:
> - /* deprecated? */
> - assert(0);
> - return FALSE;
> - break;
> -
> - case TGSI_OPCODE_XOR:
> - /* deprecated? */
> - assert(0);
> - return FALSE;
> - break;
> -
> - case TGSI_OPCODE_SAD:
> - /* deprecated? */
> - assert(0);
> - return FALSE;
> - break;
> -
> - case TGSI_OPCODE_TXF:
> - /* deprecated? */
> - assert(0);
> - return FALSE;
> - break;
> -
> - case TGSI_OPCODE_TXQ:
> - /* deprecated? */
> - assert(0);
> - return FALSE;
> - break;
> -
> - case TGSI_OPCODE_CONT:
> - lp_exec_continue(&bld->exec_mask);
> - break;
> -
> - case TGSI_OPCODE_EMIT:
> - return FALSE;
> - break;
> -
> - case TGSI_OPCODE_ENDPRIM:
> - return FALSE;
> - break;
> + }
> +}
>
> - case TGSI_OPCODE_NOP:
> - break;
> +static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
> +{
> + struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>
> - default:
> - return FALSE;
> + if (0) {
> + /* for debugging */
> + emit_dump_temps(bld);
> }
> -
> - if(info->num_dst) {
> - LLVMValueRef pred[NUM_CHANNELS];
>
> - emit_fetch_predicate( bld, inst, pred );
> -
> - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> - emit_store( bld, inst, 0, chan_index, pred[chan_index],
> dst0[chan_index]);
> + /* If we have indirect addressing in outputs we need to copy our
> alloca array
> + * to the outputs slots specified by the called */
> + if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
> + unsigned index, chan;
> + assert(bld_base->info->num_outputs <=
> + bld_base->info->file_max[TGSI_FILE_OUTPUT] +
> 1);
> + for (index = 0; index < bld_base->info->num_outputs; ++index)
> {
> + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
> + bld->outputs[index][chan] = lp_get_output_ptr(bld,
> index, chan);
> + }
> }
> }
> -
> - return TRUE;
> }
>
> -
> void
> lp_build_tgsi_soa(struct gallivm_state *gallivm,
> const struct tgsi_token *tokens,
> @@ -2325,17 +1736,12 @@ lp_build_tgsi_soa(struct gallivm_state
> *gallivm,
> LLVMValueRef consts_ptr,
> LLVMValueRef system_values_array,
> const LLVMValueRef *pos,
> - const LLVMValueRef (*inputs)[NUM_CHANNELS],
> - LLVMValueRef (*outputs)[NUM_CHANNELS],
> + const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
> + LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
> struct lp_build_sampler_soa *sampler,
> const struct tgsi_shader_info *info)
> {
> struct lp_build_tgsi_soa_context bld;
> - struct tgsi_parse_context parse;
> - uint num_immediates = 0;
> - uint num_instructions = 0;
> - unsigned i;
> - int pc = 0;
>
> struct lp_type res_type;
>
> @@ -2347,7 +1753,7 @@ lp_build_tgsi_soa(struct gallivm_state
> *gallivm,
>
> /* Setup build context */
> memset(&bld, 0, sizeof bld);
> - lp_build_context_init(&bld.base, gallivm, type);
> + lp_build_context_init(&bld.bld_base.base, gallivm, type);
> lp_build_context_init(&bld.uint_bld, gallivm,
> lp_uint_type(type));
> lp_build_context_init(&bld.elem_bld, gallivm,
> lp_elem_type(type));
> bld.mask = mask;
> @@ -2356,145 +1762,55 @@ lp_build_tgsi_soa(struct gallivm_state
> *gallivm,
> bld.outputs = outputs;
> bld.consts_ptr = consts_ptr;
> bld.sampler = sampler;
> - bld.info = info;
> + bld.bld_base.info = info;
> bld.indirect_files = info->indirect_files;
> - bld.instructions = (struct tgsi_full_instruction *)
> - MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct
> tgsi_full_instruction) );
> - bld.max_instructions = LP_MAX_INSTRUCTIONS;
>
> - if (!bld.instructions) {
> - return;
> - }
> + bld.bld_base.soa = TRUE;
> + bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] =
> emit_fetch_constant;
> + bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] =
> emit_fetch_immediate;
> + bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] =
> emit_fetch_input;
> + bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] =
> emit_fetch_temporary;
> + bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] =
> emit_fetch_system_value;
> + bld.bld_base.emit_store = emit_store;
> +
> + bld.bld_base.emit_declaration = lp_emit_declaration_soa;
> + bld.bld_base.emit_immediate = lp_emit_immediate_soa;
> +
> + bld.bld_base.emit_prologue = emit_prologue;
> + bld.bld_base.emit_epilogue = emit_epilogue;
> +
> + /* Set opcode actions */
> + lp_set_default_actions_cpu(&bld.bld_base);
> +
> + bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
> + bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
> +
> + lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
>
> - lp_exec_mask_init(&bld.exec_mask, &bld.base);
> -
> - if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
> - LLVMValueRef array_size =
> - lp_build_const_int32(gallivm,
> - info->file_max[TGSI_FILE_TEMPORARY] *
> 4 + 4);
> - bld.temps_array = lp_build_array_alloca(gallivm,
> - bld.base.vec_type,
> array_size,
> - "temp_array");
> - }
> -
> - if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
> - LLVMValueRef array_size =
> - lp_build_const_int32(gallivm,
> - info->file_max[TGSI_FILE_OUTPUT] * 4 +
> 4);
> - bld.outputs_array = lp_build_array_alloca(gallivm,
> - bld.base.vec_type,
> array_size,
> - "output_array");
> - }
> -
> - /* If we have indirect addressing in inputs we need to copy them
> into
> - * our alloca array to be able to iterate over them */
> - if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) {
> - unsigned index, chan;
> - LLVMTypeRef vec_type = bld.base.vec_type;
> - LLVMValueRef array_size =
> - lp_build_const_int32(gallivm,
> info->file_max[TGSI_FILE_INPUT]*4 + 4);
> - bld.inputs_array = lp_build_array_alloca(gallivm,
> - vec_type, array_size,
> - "input_array");
> -
> - assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] +
> 1);
> -
> - for (index = 0; index < info->num_inputs; ++index) {
> - for (chan = 0; chan < NUM_CHANNELS; ++chan) {
> - LLVMValueRef lindex =
> - lp_build_const_int32(gallivm, index * 4 + chan);
> - LLVMValueRef input_ptr =
> - LLVMBuildGEP(gallivm->builder, bld.inputs_array,
> - &lindex, 1, "");
> - LLVMValueRef value = bld.inputs[index][chan];
> - if (value)
> - LLVMBuildStore(gallivm->builder, value, input_ptr);
> - }
> - }
> - }
>
> bld.system_values_array = system_values_array;
>
> - tgsi_parse_init( &parse, tokens );
> -
> - while( !tgsi_parse_end_of_tokens( &parse ) ) {
> - tgsi_parse_token( &parse );
> -
> - switch( parse.FullToken.Token.Type ) {
> - case TGSI_TOKEN_TYPE_DECLARATION:
> - /* Inputs already interpolated */
> - emit_declaration( &bld, &parse.FullToken.FullDeclaration );
> - break;
> -
> - case TGSI_TOKEN_TYPE_INSTRUCTION:
> - {
> - /* save expanded instruction */
> - if (num_instructions == bld.max_instructions) {
> - struct tgsi_full_instruction *instructions;
> - instructions = REALLOC(bld.instructions,
> - bld.max_instructions
> - * sizeof(struct
> tgsi_full_instruction),
> - (bld.max_instructions +
> LP_MAX_INSTRUCTIONS)
> - * sizeof(struct
> tgsi_full_instruction));
> - if (!instructions) {
> - break;
> - }
> - bld.instructions = instructions;
> - bld.max_instructions += LP_MAX_INSTRUCTIONS;
> - }
> -
> - memcpy(bld.instructions + num_instructions,
> - &parse.FullToken.FullInstruction,
> - sizeof(bld.instructions[0]));
> -
> - num_instructions++;
> - }
> -
> - break;
> -
> - case TGSI_TOKEN_TYPE_IMMEDIATE:
> - /* simply copy the immediate values into the next
> immediates[] slot */
> - {
> - const uint size =
> parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
> - assert(size <= 4);
> - assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
> - for( i = 0; i < size; ++i )
> - bld.immediates[num_immediates][i] =
> - lp_build_const_vec(gallivm, type,
> parse.FullToken.FullImmediate.u[i].Float);
> - for( i = size; i < 4; ++i )
> - bld.immediates[num_immediates][i] = bld.base.undef;
> - num_immediates++;
> - }
> - break;
> -
> - case TGSI_TOKEN_TYPE_PROPERTY:
> - break;
> -
> - default:
> - assert( 0 );
> - }
> - }
> -
> - while (pc != -1) {
> - struct tgsi_full_instruction *instr = bld.instructions + pc;
> - const struct tgsi_opcode_info *opcode_info =
> - tgsi_get_opcode_info(instr->Instruction.Opcode);
> - if (!emit_instruction( &bld, instr, opcode_info, &pc ))
> - _debug_printf("warning: failed to translate tgsi opcode %s
> to LLVM\n",
> - opcode_info->mnemonic);
> - }
> -
> - /* If we have indirect addressing in outputs we need to copy our
> alloca array
> - * to the outputs slots specified by the called */
> - if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
> - unsigned index, chan;
> - assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] +
> 1);
> - for (index = 0; index < info->num_outputs; ++index) {
> - for (chan = 0; chan < NUM_CHANNELS; ++chan) {
> - bld.outputs[index][chan] = get_output_ptr(&bld, index,
> chan);
> - }
> - }
> - }
> + lp_build_tgsi_llvm(&bld.bld_base, tokens);
>
> if (0) {
> LLVMBasicBlockRef block =
> LLVMGetInsertBlock(gallivm->builder);
> @@ -2504,7 +1820,6 @@ lp_build_tgsi_soa(struct gallivm_state
> *gallivm,
> lp_debug_dump_value(function);
> debug_printf("2222222222222222222222222222 \n");
> }
> - tgsi_parse_free( &parse );
>
> if (0) {
> LLVMModuleRef module = LLVMGetGlobalParent(
> @@ -2512,8 +1827,6 @@ lp_build_tgsi_soa(struct gallivm_state
> *gallivm,
> LLVMDumpModule(module);
>
> }
> -
> - FREE( bld.instructions );
> }
>
>
> --
> 1.7.3.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
More information about the mesa-dev
mailing list