[Mesa-dev] [PATCH 6/6] gallivm: Add a new interface for doing TGSI->LLVM conversions

Jose Fonseca jfonseca at vmware.com
Mon Jan 16 11:27:55 PST 2012


Tom,

Looks good in principle!

But I need to test this myself before I can be comfortable w/ merging it into master.

Just a few quick comments inline.

Jose

----- Original Message -----
> From: Tom Stellard <thomas.stellard at amd.com>
> 
> lp_bld_tgsi_soa.c has been adapted to use this new interface, but
> lp_bld_tgsi_aos.c has only been partially adapted, since nothing in
> gallium currently uses it.

There are some closed source users of lp_bld_tgsi_aos.c. The rationale for open source it was to enable eventually using AoS in draw_llvm (as vertex shaders are usually more suitable for AoS), but it never happened yet.

Will lp_bld_tgsi_aos.c run correctly with this?

If it is not useful for r600g driver, then it might be better to leave lp_bld_tgsi_aos.c untouched for the time being. (I can do the conversion when I merge this into our private repositories).

> ---
>  src/gallium/auxiliary/Makefile.sources          |    2 +
>  src/gallium/auxiliary/gallivm/lp_bld_action.c   | 1182
>  ++++++++++++++
>  src/gallium/auxiliary/gallivm/lp_bld_action.h   |  138 ++
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi.c     |  409 +++++
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi.h     |  341 ++++-
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c |  551 +++----
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 1981
>  ++++++++---------------
>  7 files changed, 2952 insertions(+), 1652 deletions(-)
>  create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_action.c
>  create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_action.h
>  create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> 
> diff --git a/src/gallium/auxiliary/Makefile.sources
> b/src/gallium/auxiliary/Makefile.sources
> index f55a4eb..547f63d 100644
> --- a/src/gallium/auxiliary/Makefile.sources
> +++ b/src/gallium/auxiliary/Makefile.sources
> @@ -155,6 +155,7 @@ GENERATED_SOURCES := \
>  	util/u_half.c
>  
>  GALLIVM_SOURCES := \
> +        gallivm/lp_bld_action.c \
>          gallivm/lp_bld_arit.c \
>          gallivm/lp_bld_assert.c \
>          gallivm/lp_bld_bitarit.c \
> @@ -176,6 +177,7 @@ GALLIVM_SOURCES := \
>          gallivm/lp_bld_sample_soa.c \
>          gallivm/lp_bld_struct.c \
>          gallivm/lp_bld_swizzle.c \
> +	gallivm/lp_bld_tgsi.c \
>          gallivm/lp_bld_tgsi_aos.c \
>          gallivm/lp_bld_tgsi_info.c \
>          gallivm/lp_bld_tgsi_soa.c \
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_action.c
> b/src/gallium/auxiliary/gallivm/lp_bld_action.c
> new file mode 100644
> index 0000000..0b6cc77
> --- /dev/null
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_action.c

I'd prefer a source name that clearly states it is tgsi related. For example lp_bld_tgsi_soa_action.c 

> @@ -0,0 +1,1182 @@
> +/**************************************************************************
> + *
> + * Copyright 2010-2011 Advanced Micro Devices, Inc.
> + * Copyright 2009 VMware, Inc.
> + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
> + * All Rights Reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person
> obtaining a
> + * copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction,
> including
> + * without limitation the rights to use, copy, modify, merge,
> publish,
> + * distribute, sub license, and/or sell copies of the Software, and
> to
> + * permit persons to whom the Software is furnished to do so,
> subject to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice (including
> the
> + * next paragraph) shall be included in all copies or substantial
> portions
> + * of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS
> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> NON-INFRINGEMENT.
> + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE
> LIABLE FOR
> + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
> CONTRACT,
> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> +
> **************************************************************************/
> +
> +/**
> + * @file
> + * TGSI to LLVM IR translation.
> + *
> + * @author Jose Fonseca <jfonseca at vmware.com>
> + * @author Tom Stellard <thomas.stellard at amd.com>
> + *
> + * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
> + * Brian Paul, and others.
> + */
> +
> +
> +#include "lp_bld_action.h"
> +
> +#include "lp_bld_tgsi.h"
> +#include "lp_bld_arit.h"
> +#include "lp_bld_const.h"
> +#include "lp_bld_gather.h"
> +#include "lp_bld_logic.h"
> +
> +#include "tgsi/tgsi_exec.h"
> +
> +/* XXX: The CPU only defaults should be repaced by generic ones.  In
> most
> + * cases, the CPU defaults are just wrappers around a function in
> + * lp_build_arit.c and these functions should be inlined here and
> the CPU
> + * generic code should be removed and placed elsewhere.
> + */
> +
> +/* Default actions */
> +
> +/* Generic fetch_arg functions */
> +
> +static void scalar_unary_fetch_args(
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   /* src0.x */
> +   emit_data->args[0] = lp_build_emit_fetch(bld_base,
> emit_data->inst, 0, 0);
> +   emit_data->arg_count = 1;
> +   emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
> +}
> +
> +static void scalar_binary_fetch_args(
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   /* src0.x */
> +   emit_data->args[0] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> +                                            0, TGSI_CHAN_X);
> +   /* src1.x */
> +   emit_data->args[1] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> +                                            1, TGSI_CHAN_X);
> +   emit_data->arg_count = 2;
> +   emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_ADD */
> +static void
> +add_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] = LLVMBuildFAdd(
> +                                bld_base->base.gallivm->builder,
> +                                emit_data->args[0],
> emit_data->args[1], "");
> +}
> +
> +/* TGSI_OPCODE_ARR */
> +static void
> +arr_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_emit_llvm_unary(bld_base,
> +                                         TGSI_OPCODE_ROUND,
> emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_CLAMP */
> +static void
> +clamp_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   LLVMValueRef tmp;
> +   tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
> +                                   emit_data->args[0],
> +                                   emit_data->args[1]);
> +   emit_data->output[emit_data->chan] =
> lp_build_emit_llvm_binary(bld_base,
> +                                       TGSI_OPCODE_MIN, tmp,
> emit_data->args[2]);
> +}
> +
> +/* DP* Helper */
> +
> +static void
> +dp_fetch_args(
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data,
> +   unsigned dp_components)
> +{
> +   unsigned chan, src;
> +   for (src = 0; src < 2; src++) {
> +      for (chan = 0; chan < dp_components; chan++) {
> +         emit_data->args[(src * dp_components) + chan] =
> +                     lp_build_emit_fetch(bld_base, emit_data->inst,
> src, chan);
> +      }
> +   }
> +   emit_data->dst_type = bld_base->base.elem_type;
> +}
> +
> +/* TGSI_OPCODE_DP2 */
> +static void
> +dp2_fetch_args(
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   dp_fetch_args(bld_base, emit_data, 2);
> +}
> +
> +static void
> +dp2_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   LLVMValueRef tmp0, tmp1;
> +   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> +                                    emit_data->args[0] /* src0.x */,
> +                                    emit_data->args[2] /* src1.x
> */);
> +   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> +                                    emit_data->args[1] /* src0.y */,
> +                                    emit_data->args[3] /* src1.y
> */);
> +   emit_data->output[emit_data->chan] =
> lp_build_emit_llvm_binary(bld_base,
> +                                                    TGSI_OPCODE_ADD,
> tmp0, tmp1);
> +}
> +
> +static struct lp_build_opcode_action dp2_action = {
> +   .fetch_args = dp2_fetch_args,
> +   .emit = dp2_emit
> +};
> +
> +/* TGSI_OPCODE_DP2A */
> +static void
> +dp2a_fetch_args(
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   dp_fetch_args(bld_base, emit_data, 2);
> +   emit_data->args[5] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> +                                            2, TGSI_CHAN_X);
> +}
> +
> +static void
> +dp2a_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   LLVMValueRef tmp;
> +   tmp = lp_build_emit_llvm(bld_base, TGSI_OPCODE_DP2, emit_data);
> +   emit_data->output[emit_data->chan] =
> lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD,
> +                                    emit_data->args[5], tmp);
> +}
> +
> +static struct lp_build_opcode_action dp2a_action = {
> +   .fetch_args = dp2a_fetch_args,
> +   .emit = dp2a_emit
> +};
> +
> +/* TGSI_OPCODE_DP3 */
> +static void
> +dp3_fetch_args(
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   dp_fetch_args(bld_base, emit_data, 3);
> +}
> +
> +static void
> +dp3_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   LLVMValueRef tmp0, tmp1;
> +   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> +                                    emit_data->args[0] /* src0.x */,
> +                                    emit_data->args[3] /* src1.x
> */);
> +   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> +                                    emit_data->args[1] /* src0.y */,
> +                                    emit_data->args[4] /* src1.y
> */);
> +   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp1,
> tmp0);
> +   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> +                                    emit_data->args[2] /* src0.z */,
> +                                    emit_data->args[5] /* src1.z
> */);
> +   emit_data->output[emit_data->chan] =
> lp_build_emit_llvm_binary(bld_base,
> +                                                    TGSI_OPCODE_ADD,
> tmp0, tmp1);
> +}
> +
> +static struct lp_build_opcode_action dp3_action = {
> +   .fetch_args = dp3_fetch_args,
> +   .emit = dp3_emit
> +};
> +
> +/* TGSI_OPCODDE_DP4 */
> +
> +static void
> +dp4_fetch_args(
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   dp_fetch_args(bld_base, emit_data, 4);
> +}
> +
> +static void
> +dp4_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   LLVMValueRef tmp0, tmp1;
> +   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> +                                    emit_data->args[0] /* src0.x */,
> +                                    emit_data->args[4] /* src1.x
> */);
> +   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> +                                    emit_data->args[1] /* src0.y */,
> +                                    emit_data->args[5] /* src1.y
> */);
> +   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0,
> tmp1);
> +   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> +                                    emit_data->args[2] /* src0.z */,
> +                                    emit_data->args[6] /* src1.z
> */);
> +   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0,
> tmp1);
> +   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> +                                    emit_data->args[3] /* src0.w */,
> +                                    emit_data->args[7] /* src1.w
> */);
> +   emit_data->output[emit_data->chan] =
> lp_build_emit_llvm_binary(bld_base,
> +                                                    TGSI_OPCODE_ADD,
> tmp0, tmp1);
> +}
> +
> +static struct lp_build_opcode_action dp4_action = {
> +   .fetch_args = dp4_fetch_args,
> +   .emit = dp4_emit
> +};
> +
> +/* TGSI_OPCODE_DPH */
> +static void
> +dph_fetch_args(
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   dp_fetch_args(bld_base, emit_data, 4);
> +   /* src0.w */
> +   emit_data->args[3] = bld_base->base.one;
> +}
> +
> +const struct lp_build_opcode_action dph_action = {
> +   .fetch_args = dph_fetch_args,
> +   .emit = dp4_emit
> +};
> +
> +/* TGSI_OPCODE_DST */
> +static void
> +dst_fetch_args(
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   /* src0.y */
> +   emit_data->args[0] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> +                                            0, TGSI_CHAN_Y);
> +   /* src0.z */
> +   emit_data->args[1] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> +                                            0, TGSI_CHAN_Z);
> +   /* src1.y */
> +   emit_data->args[2] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> +                                            1, TGSI_CHAN_Y);
> +   /* src1.w */
> +   emit_data->args[3] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> +                                            1, TGSI_CHAN_W);
> +}
> +
> +static void
> +dst_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   /* dst.x */
> +   emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
> +
> +   /* dst.y */
> +   emit_data->output[TGSI_CHAN_Y] =
> lp_build_emit_llvm_binary(bld_base,
> +                                          TGSI_OPCODE_MUL,
> +                                          emit_data->args[0] /*
> src0.y */,
> +                                          emit_data->args[2] /*
> src1.y */);
> +   /* dst.z */
> +   emit_data->output[TGSI_CHAN_Z] = emit_data->args[1]; /* src0.z */
> +
> +   /* dst.w */
> +   emit_data->output[TGSI_CHAN_W] = emit_data->args[3]; /* src1.w */
> +}
> +
> +static struct lp_build_opcode_action dst_action = {
> +   .fetch_args = dst_fetch_args,
> +   .emit = dst_emit
> +};
> +
> +/* TGSI_OPCODE_END */
> +static void
> +end_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   bld_base->pc = -1;
> +}
> +
> +/* TGSI_OPCODE_EXP */
> +
> +static void
> +exp_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   LLVMValueRef floor_x;
> +
> +   /* floor( src0.x ) */
> +   floor_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
> +                                      emit_data->args[0]);
> +
> +   /* 2 ^ floor( src0.x ) */
> +   emit_data->output[TGSI_CHAN_X] =
> lp_build_emit_llvm_unary(bld_base,
> +                                       TGSI_OPCODE_EX2, floor_x);
> +
> +   /* src0.x - floor( src0.x ) */
> +   emit_data->output[TGSI_CHAN_Y] =
> lp_build_emit_llvm_binary(bld_base,
> +                   TGSI_OPCODE_SUB,  emit_data->args[0] /* src0.x
> */, floor_x);
> +
> +   /* 2 ^ src0.x */
> +   emit_data->output[TGSI_CHAN_Z] =
> lp_build_emit_llvm_unary(bld_base,
> +                             TGSI_OPCODE_EX2, emit_data->args[0] /*
> src0.x */);
> +
> +   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
> +}
> +
> +const struct lp_build_opcode_action exp_action = {
> +   .fetch_args = scalar_unary_fetch_args,
> +   .emit = exp_emit
> +};
> +
> +/* TGSI_OPCODE_FRC */
> +
> +static void
> +frc_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   LLVMValueRef tmp;
> +   tmp = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
> +                                  emit_data->args[0]);
> +   emit_data->output[emit_data->chan] =
> lp_build_emit_llvm_binary(bld_base,
> +                                       TGSI_OPCODE_SUB,
> emit_data->args[0], tmp);
> +}
> +
> +/* TGSI_OPCODE_KIL */
> +
> +static void
> +kil_fetch_args(
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   /* src0.x */
> +   emit_data->args[0] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> +                                            0, TGSI_CHAN_X);
> +   /* src0.y */
> +   emit_data->args[1] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> +                                            0, TGSI_CHAN_Y);
> +   /* src0.z */
> +   emit_data->args[2] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> +                                            0, TGSI_CHAN_Z);
> +   /* src0.w */
> +   emit_data->args[3] = lp_build_emit_fetch(bld_base,
> emit_data->inst,
> +                                            0, TGSI_CHAN_W);
> +   emit_data->arg_count = 4;
> +   emit_data->dst_type =
> LLVMVoidTypeInContext(bld_base->base.gallivm->context);
> +}
> +
> +/* TGSI_OPCODE_KILP */
> +
> +static void
> +kilp_fetch_args(
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->dst_type =
> LLVMVoidTypeInContext(bld_base->base.gallivm->context);
> +}
> +
> +/* TGSI_OPCODE_LIT */
> +
> +static void
> +lit_fetch_args(
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   /* src0.x */
> +   emit_data->args[0] = lp_build_emit_fetch(bld_base,
> emit_data->inst, 0, TGSI_CHAN_X);
> +   /* src0.y */
> +   emit_data->args[1] = lp_build_emit_fetch(bld_base,
> emit_data->inst, 0, TGSI_CHAN_Y);
> +   /* src0.w */
> +   emit_data->args[2] = lp_build_emit_fetch(bld_base,
> emit_data->inst, 0, TGSI_CHAN_W);
> +   emit_data->arg_count = 3;
> +}
> +
> +static void
> +lit_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   LLVMValueRef tmp0, tmp2;
> +
> +   /* dst.x */
> +   emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
> +
> +   /* dst. y */
> +   emit_data->output[TGSI_CHAN_Y] =
> lp_build_emit_llvm_binary(bld_base,
> +                                               TGSI_OPCODE_MAX,
> +                                               emit_data->args[0] /*
> src0.x */,
> +                                               bld_base->base.zero);
> +
> +   /* dst.z */
> +   /* XMM[1] = SrcReg[0].yyyy */
> +   LLVMValueRef tmp1 = emit_data->args[1];
> +   /* XMM[1] = max(XMM[1], 0) */
> +   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
> +                                    tmp1, bld_base->base.zero);
> +   /* XMM[2] = SrcReg[0].wwww */
> +   tmp2 = emit_data->args[2];
> +   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_POW,
> +                                    tmp1, tmp2);
> +   tmp0 = emit_data->args[0];
> +   emit_data->output[TGSI_CHAN_Z] =
> lp_build_emit_llvm_ternary(bld_base,
> +                                             TGSI_OPCODE_CMP,
> +                                             tmp0,
> bld_base->base.zero, tmp1);
> +   /* dst.w */
> +   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
> +}
> +
> +static struct lp_build_opcode_action lit_action = {
> +   .fetch_args = lit_fetch_args,
> +   .emit = lit_emit
> +};
> +
> +/* TGSI_OPCODE_LOG */
> +
> +static void
> +log_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +
> +   LLVMValueRef abs_x, log_abs_x, flr_log_abs_x, ex2_flr_log_abs_x;
> +
> +   /* abs( src0.x) */
> +   abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
> +                                    emit_data->args[0] /* src0.x
> */);
> +
> +   /* log( abs( src0.x ) ) */
> +   log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_LG2,
> +                                        abs_x);
> +
> +   /* floor( log( abs( src0.x ) ) ) */
> +   flr_log_abs_x = lp_build_emit_llvm_unary(bld_base,
> TGSI_OPCODE_FLR,
> +                                            log_abs_x);
> +   /* dst.x */
> +   emit_data->output[TGSI_CHAN_X] = flr_log_abs_x;
> +
> +   /* dst.y */
> +   ex2_flr_log_abs_x = lp_build_emit_llvm_unary(bld_base,
> TGSI_OPCODE_EX2,
> +                                                flr_log_abs_x);
> +
> +   /* abs( src0.x ) / 2^( floor( lg2( abs( src0.x ) ) ) ) */
> +   emit_data->output[TGSI_CHAN_Y] =
> lp_build_emit_llvm_binary(bld_base,
> +                                    TGSI_OPCODE_DIV, abs_x,
> ex2_flr_log_abs_x);
> +
> +   /* dst.x */
> +   emit_data->output[TGSI_CHAN_Z] = log_abs_x;
> +
> +   /* dst.w */
> +   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
> +}
> +
> +static struct lp_build_opcode_action log_action = {
> +   .fetch_args = scalar_unary_fetch_args,
> +   .emit = log_emit
> +};
> +
> +/* TGSI_OPCODE_LRP */
> +
> +static void
> +lrp_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   LLVMValueRef tmp;
> +   tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB,
> +                                   emit_data->args[1],
> +                                   emit_data->args[2]);
> +   emit_data->output[emit_data->chan] =
> lp_build_emit_llvm_ternary(bld_base,
> +                    TGSI_OPCODE_MAD, emit_data->args[0], tmp,
> emit_data->args[2]);
> +}
> +
> +/* TGSI_OPCODE_MAD */
> +
> +static void
> +mad_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   LLVMValueRef tmp;
> +   tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
> +                                   emit_data->args[0],
> +                                   emit_data->args[1]);
> +   emit_data->output[emit_data->chan] =
> lp_build_emit_llvm_binary(bld_base,
> +                                       TGSI_OPCODE_ADD, tmp,
> emit_data->args[2]);
> +}
> +
> +/* TGSI_OPCODE_MOV */
> +
> +static void
> +mov_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] = emit_data->args[0];
> +}
> +
> +/* TGSI_OPCODE_MUL */
> +static void
> +mul_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_mul(&bld_base->base,
> +                                   emit_data->args[0],
> emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_POW */
> +
> +static void
> +pow_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_pow(&bld_base->base,
> +                                   emit_data->args[0],
> emit_data->args[1]);
> +}
> +
> +static struct lp_build_opcode_action pow_action = {
> +   .fetch_args = scalar_binary_fetch_args,
> +   .emit = pow_emit
> +};
> +
> +/* TGSI_OPCODE_RSQ */
> +
> +static void
> +rsq_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->args[0] = lp_build_emit_llvm_unary(bld_base,
> TGSI_OPCODE_ABS,
> +                                               emit_data->args[0]);
> +   if (bld_base->rsq_action.emit) {
> +      bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base,
> emit_data);
> +   } else {
> +      emit_data->output[emit_data->chan] = bld_base->base.undef;
> +   }
> +}
> +
> +const struct lp_build_opcode_action rsq_action = {
> +   .fetch_args = scalar_unary_fetch_args,
> +   .emit = rsq_emit
> +
> +};
> +
> +/* TGSI_OPCODE_SCS */
> +static void
> +scs_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   /* dst.x */
> +   emit_data->output[TGSI_CHAN_X] =
> lp_build_emit_llvm_unary(bld_base,
> +                                           TGSI_OPCODE_COS,
> emit_data->args[0]);
> +   /* dst.y */
> +   emit_data->output[TGSI_CHAN_Y] =
> lp_build_emit_llvm_unary(bld_base,
> +                                           TGSI_OPCODE_SIN,
> emit_data->args[0]);
> +   /* dst.z */
> +   emit_data->output[TGSI_CHAN_Z] = bld_base->base.zero;
> +
> +   /* dst.w */
> +   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
> +}
> +
> +const struct lp_build_opcode_action scs_action = {
> +   .fetch_args = scalar_unary_fetch_args,
> +   .emit = scs_emit
> +};
> +
> +/* TGSI_OPCODE_SFL */
> +
> +static void
> +sfl_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] = bld_base->base.zero;
> +}
> +
> +/* TGSI_OPCODE_STR */
> +
> +static void
> +str_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] = bld_base->base.one;
> +}
> +
> +/* TGSI_OPCODE_SUB */
> +static void
> +sub_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +	emit_data->output[emit_data->chan] = LLVMBuildFSub(
> +				bld_base->base.gallivm->builder,
> +				emit_data->args[0],
> +				emit_data->args[1], "");
> +}
> +
> +/* TGSI_OPCODE_XPD */
> +
> +static void
> +xpd_fetch_args(
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   dp_fetch_args(bld_base, emit_data, 3);
> +}
> +
> +/**
> + * (a * b) - (c * d)
> + */
> +static LLVMValueRef
> +xpd_helper(
> +  struct lp_build_tgsi_context * bld_base,
> +  LLVMValueRef a,
> +  LLVMValueRef b,
> +  LLVMValueRef c,
> +  LLVMValueRef d)
> +{
> +   LLVMValueRef tmp0, tmp1;
> +
> +   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, a,
> b);
> +   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, c,
> d);
> +
> +   return lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB, tmp0,
> tmp1);
> +}
> +
> +static void
> +xpd_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[TGSI_CHAN_X] = xpd_helper(bld_base,
> +              emit_data->args[1] /* src0.y */, emit_data->args[5] /*
> src1.z */,
> +              emit_data->args[4] /* src1.y */, emit_data->args[2] /*
> src0.z */);
> +
> +   emit_data->output[TGSI_CHAN_Y] = xpd_helper(bld_base,
> +              emit_data->args[2] /* src0.z */, emit_data->args[3] /*
> src1.x */,
> +              emit_data->args[5] /* src1.z */, emit_data->args[0] /*
> src0.x */);
> +
> +   emit_data->output[TGSI_CHAN_Z] = xpd_helper(bld_base,
> +              emit_data->args[0] /* src0.x */, emit_data->args[4] /*
> src1.y */,
> +              emit_data->args[3] /* src1.x */, emit_data->args[1] /*
> src0.y */);
> +
> +   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
> +}
> +
> +const struct lp_build_opcode_action xpd_action = {
> +   .fetch_args = xpd_fetch_args,
> +   .emit = xpd_emit
> +};
> +
> +void
> +lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
> +{
> +   bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action;
> +   bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action;
> +   bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action;
> +   bld_base->op_actions[TGSI_OPCODE_DP2A] = dp2a_action;
> +   bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action;
> +   bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
> +   bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
> +   bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
> +   bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
> +   bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
> +   bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
> +   bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
> +   bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
> +
> +   bld_base->op_actions[TGSI_OPCODE_COS].fetch_args =
> scalar_unary_fetch_args;
> +   bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args =
> scalar_unary_fetch_args;
> +   bld_base->op_actions[TGSI_OPCODE_IF].fetch_args =
> scalar_unary_fetch_args;
> +   bld_base->op_actions[TGSI_OPCODE_KIL].fetch_args =
> kil_fetch_args;
> +   bld_base->op_actions[TGSI_OPCODE_KILP].fetch_args =
> kilp_fetch_args;
> +   bld_base->op_actions[TGSI_OPCODE_RCP].fetch_args =
> scalar_unary_fetch_args;
> +   bld_base->op_actions[TGSI_OPCODE_SIN].fetch_args =
> scalar_unary_fetch_args;
> +   bld_base->op_actions[TGSI_OPCODE_LG2].fetch_args =
> scalar_unary_fetch_args;
> +
> +   bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit;
> +   bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit;
> +   bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = clamp_emit;
> +   bld_base->op_actions[TGSI_OPCODE_END].emit = end_emit;
> +   bld_base->op_actions[TGSI_OPCODE_FRC].emit = frc_emit;
> +   bld_base->op_actions[TGSI_OPCODE_LRP].emit = lrp_emit;
> +   bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit;
> +   bld_base->op_actions[TGSI_OPCODE_MOV].emit = mov_emit;
> +   bld_base->op_actions[TGSI_OPCODE_MUL].emit = mul_emit;
> +   bld_base->op_actions[TGSI_OPCODE_SFL].emit = sfl_emit;
> +   bld_base->op_actions[TGSI_OPCODE_STR].emit = str_emit;
> +   bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit;
> +}
> +
> +/* CPU Only default actions */
> +
> +/* These actions are CPU only, because they could potentially output
> SSE
> + * intrinsics.
> + */
> +
> +/* TGSI_OPCODE_ABS (CPU Only)*/
> +
> +static void
> +abs_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_abs(&bld_base->base,
> +
>                                                       emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_ADD (CPU Only) */
> +static void
> +add_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_add(&bld_base->base,
> +                                   emit_data->args[0],
> emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_CEIL (CPU Only) */
> +static void
> +ceil_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_trunc(&bld_base->base,
> +
>                                                         emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_CMP (CPU Only) */
> +static void
> +cmp_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   LLVMValueRef cond = lp_build_cmp(&bld_base->base, PIPE_FUNC_LESS,
> +                                   emit_data->args[0],
> bld_base->base.zero);
> +   emit_data->output[emit_data->chan] =
> lp_build_select(&bld_base->base,
> +                                cond, emit_data->args[1],
> emit_data->args[2]);
> +}
> +
> +/* TGSI_OPCODE_CND (CPU Only) */
> +static void
> +cnd_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   LLVMValueRef half, tmp;
> +   half = lp_build_const_vec(bld_base->base.gallivm,
> bld_base->base.type, 0.5);
> +   tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_GREATER,
> +                      emit_data->args[2], half);
> +   emit_data->output[emit_data->chan] =
> lp_build_select(&bld_base->base,
> +                                          tmp,
> +                                          emit_data->args[0],
> +                                          emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_COS (CPU Only) */
> +static void
> +cos_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_cos(&bld_base->base,
> +
>                                                       emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_DIV (CPU Only) */
> +static void
> +div_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_div(&bld_base->base,
> +                                   emit_data->args[0],
> emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_EX2 (CPU Only) */
> +static void
> +ex2_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_exp2(&bld_base->base,
> +
>                                                        emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_EXP (CPU Only) */
> +static void
> +exp_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   lp_build_exp2_approx(&bld_base->base, emit_data->args[0],
> +                        &emit_data->output[TGSI_CHAN_X],
> +                        &emit_data->output[TGSI_CHAN_Y],
> +                        &emit_data->output[TGSI_CHAN_Z]);
> +   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
> +}
> +
> +/* TGSI_OPCODE_FLR (CPU Only) */
> +
> +static void
> +flr_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_floor(&bld_base->base,
> +
>                                                         emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_LG2 (CPU Only) */
> +static void
> +lg2_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_log2(&bld_base->base,
> +
>                                                        emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_LOG (CPU Only) */
> +static void
> +log_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   LLVMValueRef p_floor_log2;
> +   LLVMValueRef p_exp;
> +   LLVMValueRef p_log2;
> +   LLVMValueRef src0 = emit_data->args[0];
> +
> +   lp_build_log2_approx(&bld_base->base, src0,
> +                        &p_exp, &p_floor_log2, &p_log2);
> +
> +   emit_data->output[TGSI_CHAN_X] = p_floor_log2;
> +
> +   emit_data->output[TGSI_CHAN_Y] =
> lp_build_emit_llvm_binary(bld_base,
> +                                             TGSI_OPCODE_DIV,
> +                                             src0, p_exp);
> +   emit_data->output[TGSI_CHAN_Z] = p_log2;
> +
> +   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
> +
> +}
> +
> +/* TGSI_OPCODE_MAX (CPU Only) */
> +
> +static void
> +max_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_max(&bld_base->base,
> +                                   emit_data->args[0],
> emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_MIN (CPU Only) */
> +static void
> +min_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_min(&bld_base->base,
> +                                   emit_data->args[0],
> emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_POW (CPU Only) */
> +static void
> +pow_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_pow(&bld_base->base,
> +                                   emit_data->args[0],
> emit_data->args[1]);
> +}
> +
> +
> +/* TGSI_OPCODE_RCP (CPU Only) */
> +
> +static void
> +rcp_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_rcp(&bld_base->base,
> +
>                                                       emit_data->args[0]);
> +}
> +
> +/* Reciprical squareroot (CPU Only) */
> +
> +/* This is not the same as TGSI_OPCODE_RSQ, which requres the
> argument to be
> + * greater than or equal to 0 */
> +static void
> +recip_sqrt_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_rsqrt(&bld_base->base,
> +
>                                                         emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_ROUND (CPU Only) */
> +static void
> +round_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_round(&bld_base->base,
> +
>                                                         emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_SET Helper (CPU Only) */
> +
> +static void
> +set_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data,
> +   unsigned pipe_func)
> +{
> +   LLVMValueRef cond = lp_build_cmp(&bld_base->base, pipe_func,
> +                                    emit_data->args[0],
> emit_data->args[1]);
> +   emit_data->output[emit_data->chan] =
> lp_build_select(&bld_base->base,
> +                                          cond,
> +                                          bld_base->base.one,
> +                                          bld_base->base.zero);
> +}
> +
> +/* TGSI_OPCODE_SEQ (CPU Only) */
> +
> +static void
> +seq_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
> +}
> +
> +/* TGSI_OPCODE_SGE (CPU Only) */
> +static void
> +sge_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
> +}
> +
> +/* TGSI_OPCODE_SGT (CPU Only)*/
> +
> +static void
> +sgt_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GREATER);
> +}
> +
> +/* TGSI_OPCODE_SIN (CPU Only) */
> +static void
> +sin_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_sin(&bld_base->base,
> +
>                                                       emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_SLE (CPU Only) */
> +static void
> +sle_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LEQUAL);
> +}
> +
> +/* TGSI_OPCODE_SLT (CPU Only) */
> +
> +static void
> +slt_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
> +}
> +
> +/* TGSI_OPCODE_SNE (CPU Only) */
> +
> +static void
> +sne_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
> +}
> +
> +/* TGSI_OPCODE_SSG (CPU Only) */
> +
> +static void
> +ssg_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_sgn(&bld_base->base,
> +
>                                                       emit_data->args[0]);
> +}
> +
> +/* TGSI_OPCODE_SUB (CPU Only) */
> +
> +static void
> +sub_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_sub(&bld_base->base,
> +
>                                                        emit_data->args[0],
> +
>                                                        emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_TRUNC (CPU Only) */
> +
> +static void
> +trunc_emit_cpu(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] =
> lp_build_trunc(&bld_base->base,
> +
>                                                         emit_data->args[0]);
> +}
> +
> +void
> +lp_set_default_actions_cpu(
> +   struct lp_build_tgsi_context * bld_base)
> +{
> +   lp_set_default_actions(bld_base);
> +   bld_base->op_actions[TGSI_OPCODE_ABS].emit = abs_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_ARL].emit = flr_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_CEIL].emit = ceil_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_CND].emit = cnd_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_COS].emit = cos_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_EXP].emit = exp_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_MAX].emit = max_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_MIN].emit = min_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_POW].emit = pow_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_ROUND].emit = round_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_SEQ].emit = seq_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_SGE].emit = sge_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_SGT].emit = sgt_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_SIN].emit = sin_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_SLE].emit = sle_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_SLT].emit = slt_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_SNE].emit = sne_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_SSG].emit = ssg_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit_cpu;
> +   bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = trunc_emit_cpu;
> +
> +   bld_base->rsq_action.emit = recip_sqrt_emit_cpu;
> +}
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_action.h
> b/src/gallium/auxiliary/gallivm/lp_bld_action.h
> new file mode 100644
> index 0000000..5495a86
> --- /dev/null
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_action.h
> @@ -0,0 +1,138 @@
> +/*
> + * Copyright 2010-2011 Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person
> obtaining a
> + * copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction,
> including
> + * without limitation the rights to use, copy, modify, merge,
> publish,
> + * distribute, sub license, and/or sell copies of the Software, and
> to
> + * permit persons to whom the Software is furnished to do so,
> subject to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice (including
> the
> + * next paragraph) shall be included in all copies or substantial
> portions
> + * of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS
> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> NON-INFRINGEMENT.
> + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE
> LIABLE FOR
> + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
> CONTRACT,
> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> +
> **************************************************************************/
> +
> +/**
> + *
> + * @author Tom Stellard <thomas.stellard at amd.com>
> + *
> + */
> +
> +
> +#ifndef LP_BLD_ACTION_H
> +#define LP_BLD_ACTION_H
> +
> +#include <llvm-c/Core.h>
> +
> +struct lp_build_tgsi_context;
> +
> +struct lp_build_emit_data {
> +   /** Arguments that are passed to lp_build_opcode_action::emit.
>  The
> +    * order of the arguments should be as follows:
> +    * SOA: s0.x, s0.y, s0.z, s0.w, s1.x, s1.y, s1.z, s1.w, s2.x,
> s2.y, s2.x, s2.w
> +    * AOS: s0.xyzw, s1.xyzw, s2.xyzw
> +    * TEXTURE Instructions: coord.xyzw
> +    *
> +    * Arguments should be packed into the args array.  For example
> an SOA
> +    * instructions that reads s0.x and s1.x args should look like
> this:
> +    * args[0] = s0.x;
> +    * args[1] = s1.x;
> +    */
> +   LLVMValueRef args[12];
> +
> +   /**
> +    * Number of arguments in the args array.
> +    */
> +   unsigned arg_count;
> +
> +   /**
> +    * The type output type of the opcode.  This should be set in the
> +    * lp_build_opcode_action::fetch_args function.
> +    */
> +   LLVMTypeRef dst_type;
> +
> +   /** This is used by the lp_build_opcode_action::fetch_args
> function to
> +    * determine which channel to read from the opcode arguments.  It
> also
> +    * specifies which index of the output array should be written to
> by
> +    * the lp_build_opcode_action::emit function.  However, this
> value is
> +    * usually ignored by any opcodes that are not
> TGSI_OUTPUT_COMPONENTWISE.
> +    */
> +   unsigned chan;
> +
> +   /** The lp_build_opcode_action::emit 'executes' the opcode and
> writes the
> +    * results to this array.
> +    */
> +   LLVMValueRef output[4];
> +
> +   /**
> +    * The current instruction that is being 'executed'.
> +    */
> +   const struct tgsi_full_instruction * inst;
> +   const struct tgsi_opcode_info * info;
> +};
> +
> +struct lp_build_opcode_action
> +{
> +
> +   /**
> +    * This function is responsible for doing 2-3 things:
> +    * 1. Fetching the instruction arguments into the emit_data->args
> array.
> +    * 2. Setting the number of arguments in emit_data->arg_count.
> +    * 3. Setting the destination type in emit_data->dst_type
> (usually only
> +    *    necessary for opcodes that are TGSI_OUTPUT_COMPONENTWISE).
> +    */
> +   void (*fetch_args)(struct lp_build_tgsi_context *,
> +                      struct lp_build_emit_data *);
> +
> +
> +   /**
> +    * This function is responsible for emitting LLVM IR for a TGSI
> opcode.
> +    * It should store the values it generates in the
> emit_data->output array
> +    * and for TGSI_OUTPUT_COMPONENTWISE and TGSI_OUTPUT_REPLICATE
> instructions
> +    * (and possibly others depending on the specific
> implementation), it should
> +    * make sure to store the values in the array slot indexed by
> emit_data->chan.
> +    */
> +   void (*emit)(const struct lp_build_opcode_action *,
> +                        struct lp_build_tgsi_context *,
> +                        struct lp_build_emit_data *);
> +
> +   /**
> +    * This variable can be used to store an intrinsic name, in case
> the TGSI
> +    * opcode will be replaced by a target specific intrinsic.
>  (There is a
> +    * convenience function in lp_bld_tgsi.c called
> lp_build_tgsi_intrinsic()
> +    * that can be assigned to lp_build_opcode_action::emit and used
> for
> +    * generating intrinsics).
> +    */
> +   const char * intr_name;
> +};
> +
> +/**
> + * This function initializes the bld_base->op_actions array with
> some
> + * generic operand actions.
> + */
> +void
> +lp_set_default_actions(
> +   struct lp_build_tgsi_context * bld_base);
> +
> +/*
> + * This function initialize the bld_base->op_actions array with some
> + * operand actions that are intended only for use when generating
> + * instructions to be executed on a CPU.
> + */
> +void
> +lp_set_default_actions_cpu(
> +   struct lp_build_tgsi_context * bld_base);
> +
> +#endif /* LP_BLD_ACTION_H */
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> new file mode 100644
> index 0000000..9ec4a9b
> --- /dev/null
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> @@ -0,0 +1,409 @@
> +/**************************************************************************
> + *
> + * Copyright 2010-2011 Advanced Micro Devices, Inc.
> + * Copyright 2010 VMware, Inc.
> + * Copyright 2009 VMware, Inc.
> + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
> + * All Rights Reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person
> obtaining a
> + * copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction,
> including
> + * without limitation the rights to use, copy, modify, merge,
> publish,
> + * distribute, sub license, and/or sell copies of the Software, and
> to
> + * permit persons to whom the Software is furnished to do so,
> subject to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice (including
> the
> + * next paragraph) shall be included in all copies or substantial
> portions
> + * of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS
> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> NON-INFRINGEMENT.
> + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
> + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
> CONTRACT,
> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> +
> **************************************************************************/
> +
> +#include "gallivm/lp_bld_tgsi.h"
> +
> +#include "gallivm/lp_bld_arit.h"
> +#include "gallivm/lp_bld_gather.h"
> +#include "gallivm/lp_bld_init.h"
> +#include "gallivm/lp_bld_intr.h"
> +#include "tgsi/tgsi_info.h"
> +#include "tgsi/tgsi_parse.h"
> +#include "tgsi/tgsi_util.h"
> +#include "util/u_memory.h"
> +
> +/* The user is responsible for freeing list->instructions */
> +unsigned lp_bld_tgsi_list_init(struct lp_build_tgsi_context *
> bld_base)
> +{
> +   bld_base->instructions = (struct tgsi_full_instruction *)
> +         MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct
> tgsi_full_instruction) );
> +   if (!bld_base->instructions) {
> +      return 0;
> +   }
> +   bld_base->max_instructions = LP_MAX_INSTRUCTIONS;
> +   return 1;
> +}
> +
> +
> +unsigned lp_bld_tgsi_add_instruction(
> +   struct lp_build_tgsi_context * bld_base,
> +   struct tgsi_full_instruction *inst_to_add)
> +{
> +
> +   if (bld_base->num_instructions == bld_base->max_instructions) {
> +      struct tgsi_full_instruction *instructions;
> +      instructions = REALLOC(bld_base->instructions,
> bld_base->max_instructions
> +                                      * sizeof(struct
> tgsi_full_instruction),
> +                                      (bld_base->max_instructions +
> LP_MAX_INSTRUCTIONS)
> +                                      * sizeof(struct
> tgsi_full_instruction));
> +      if (!instructions) {
> +         return 0;
> +      }
> +      bld_base->instructions = instructions;
> +      bld_base->max_instructions += LP_MAX_INSTRUCTIONS;
> +   }
> +   memcpy(bld_base->instructions + bld_base->num_instructions,
> inst_to_add,
> +          sizeof(bld_base->instructions[0]));
> +
> +   bld_base->num_instructions++;
> +
> +   return 1;
> +}
> +
> +
> +/**
> + * This function assumes that all the args in emit_data have been
> set.
> + */
> +static void
> +lp_build_action_set_dst_type(
> +   struct lp_build_emit_data * emit_data,
> +   struct lp_build_tgsi_context *bld_base,
> +   unsigned tgsi_opcode)
> +{
> +   if (emit_data->arg_count == 0) {
> +      emit_data->dst_type =
> LLVMVoidTypeInContext(bld_base->base.gallivm->context);
> +   } else {
> +      /* XXX: Not all opcodes have the same src and dst types. */
> +      emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
> +   }
> +}
> +
> +void
> +lp_build_tgsi_intrinsic(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_context * base = &bld_base->base;
> +   emit_data->output[emit_data->chan] = lp_build_intrinsic(
> +               base->gallivm->builder, action->intr_name,
> +               emit_data->dst_type, emit_data->args,
> emit_data->arg_count);
> +}
> +
> +LLVMValueRef
> +lp_build_emit_llvm(
> +   struct lp_build_tgsi_context *bld_base,
> +   unsigned tgsi_opcode,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_opcode_action * action =
> &bld_base->op_actions[tgsi_opcode];
> +   /* XXX: Assert that this is a componentwise or replicate
> instruction */
> +
> +   lp_build_action_set_dst_type(emit_data, bld_base, tgsi_opcode);
> +   emit_data->chan = 0;
> +   assert(action->emit);
> +   action->emit(action, bld_base, emit_data);
> +   return emit_data->output[0];
> +}
> +
> +LLVMValueRef
> +lp_build_emit_llvm_unary(
> +   struct lp_build_tgsi_context *bld_base,
> +   unsigned tgsi_opcode,
> +   LLVMValueRef arg0)
> +{
> +   struct lp_build_emit_data emit_data;
> +   emit_data.arg_count = 1;
> +   emit_data.args[0] = arg0;
> +   return lp_build_emit_llvm(bld_base, tgsi_opcode, &emit_data);
> +}
> +
> +LLVMValueRef
> +lp_build_emit_llvm_binary(
> +   struct lp_build_tgsi_context *bld_base,
> +   unsigned tgsi_opcode,
> +   LLVMValueRef arg0,
> +   LLVMValueRef arg1)
> +{
> +   struct lp_build_emit_data emit_data;
> +   emit_data.arg_count = 2;
> +   emit_data.args[0] = arg0;
> +   emit_data.args[1] = arg1;
> +   return lp_build_emit_llvm(bld_base, tgsi_opcode, &emit_data);
> +}
> +
> +LLVMValueRef
> +lp_build_emit_llvm_ternary(
> +   struct lp_build_tgsi_context *bld_base,
> +   unsigned tgsi_opcode,
> +   LLVMValueRef arg0,
> +   LLVMValueRef arg1,
> +   LLVMValueRef arg2)
> +{
> +   struct lp_build_emit_data emit_data;
> +   emit_data.arg_count = 3;
> +   emit_data.args[0] = arg0;
> +   emit_data.args[1] = arg1;
> +   emit_data.args[2] = arg2;
> +   return lp_build_emit_llvm(bld_base, tgsi_opcode, &emit_data);
> +}
> +
> +/**
> + * The default fetch implementation.
> + */
> +void lp_build_fetch_args(
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   unsigned src;
> +   for (src = 0; src < emit_data->info->num_src; src++) {
> +      emit_data->args[src] = lp_build_emit_fetch(bld_base,
> emit_data->inst, src,
> +                                               emit_data->chan);
> +   }
> +   emit_data->arg_count = emit_data->info->num_src;
> +   lp_build_action_set_dst_type(emit_data, bld_base,
> +		emit_data->inst->Instruction.Opcode);
> +}
> +
> +/* XXX: COMMENT
> + * It should be assumed that this function ignores writemasks
> + */
> +boolean
> +lp_build_tgsi_inst_llvm(
> +   struct lp_build_tgsi_context * bld_base,
> +   const struct tgsi_full_instruction * inst)
> +{
> +   unsigned tgsi_opcode = inst->Instruction.Opcode;
> +   const struct tgsi_opcode_info * info =
> tgsi_get_opcode_info(tgsi_opcode);
> +   const struct lp_build_opcode_action * action =
> +
>                                         &bld_base->op_actions[tgsi_opcode];
> +   struct lp_build_emit_data emit_data;
> +   unsigned chan_index;
> +   LLVMValueRef val;
> +
> +   bld_base->pc++;
> +
> +   /* Ignore deprecated instructions */
> +   switch (inst->Instruction.Opcode) {
> +
> +   case TGSI_OPCODE_RCC:
> +   case TGSI_OPCODE_UP2H:
> +   case TGSI_OPCODE_UP2US:
> +   case TGSI_OPCODE_UP4B:
> +   case TGSI_OPCODE_UP4UB:
> +   case TGSI_OPCODE_X2D:
> +   case TGSI_OPCODE_ARA:
> +   case TGSI_OPCODE_BRA:
> +   case TGSI_OPCODE_DIV:
> +   case TGSI_OPCODE_PUSHA:
> +   case TGSI_OPCODE_POPA:
> +   case TGSI_OPCODE_I2F:
> +   case TGSI_OPCODE_NOT:
> +   case TGSI_OPCODE_SHL:
> +   case TGSI_OPCODE_ISHR:
> +   case TGSI_OPCODE_AND:
> +   case TGSI_OPCODE_OR:
> +   case TGSI_OPCODE_MOD:
> +   case TGSI_OPCODE_XOR:
> +   case TGSI_OPCODE_SAD:
> +   case TGSI_OPCODE_TXF:
> +   case TGSI_OPCODE_TXQ:
> +      /* deprecated? */
> +      assert(0);
> +      return FALSE;
> +      break;
> +   }
> +
> +   /* Check if the opcode has been implemented */
> +   if (!action->emit) {
> +      return FALSE;
> +   }
> +
> +   memset(&emit_data, 0, sizeof(emit_data));
> +
> +   assert(info->num_dst <= 1);
> +   if (info->num_dst) {
> +      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> +         emit_data.output[chan_index] = bld_base->base.undef;
> +      }
> +   }
> +
> +   emit_data.inst = inst;
> +   emit_data.info = info;
> +
> +   /* Emit the instructions */
> +   if (info->output_type == TGSI_OUTPUT_COMPONENTWISE &&
> bld_base->soa) {
> +      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
> +         emit_data.chan = chan_index;
> +         if (!action->fetch_args) {
> +            lp_build_fetch_args(bld_base, &emit_data);
> +         } else {
> +             action->fetch_args(bld_base, &emit_data);
> +         }
> +         action->emit(action, bld_base, &emit_data);
> +      }
> +   } else {
> +      emit_data.chan = LP_CHAN_ALL;
> +      if (action->fetch_args) {
> +         action->fetch_args(bld_base, &emit_data);
> +      }
> +      /* Make sure the output value is stored in
> emit_data.output[0], unless
> +       * the opcode is channel dependent */
> +      if (info->output_type != TGSI_OUTPUT_CHAN_DEPENDENT) {
> +         emit_data.chan = 0;
> +      }
> +      action->emit(action, bld_base, &emit_data);
> +
> +      /* Replicate the output values */
> +      if (info->output_type == TGSI_OUTPUT_REPLICATE &&
> bld_base->soa) {
> +         val = emit_data.output[0];
> +         memset(emit_data.output, 0, sizeof(emit_data.output));
> +         TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
> +            emit_data.output[chan_index] = val;
> +         }
> +      }
> +   }
> +
> +   if (info->num_dst > 0) {
> +      bld_base->emit_store(bld_base, inst, info, emit_data.output);
> +   }
> +   return TRUE;
> +}
> +
> +
> +LLVMValueRef
> +lp_build_emit_fetch(
> +   struct lp_build_tgsi_context *bld_base,
> +   const struct tgsi_full_instruction *inst,
> +   unsigned src_op,
> +   const unsigned chan_index)
> +{
> +   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
> +   unsigned swizzle;
> +   LLVMValueRef res;
> +
> +   if (chan_index == LP_CHAN_ALL) {
> +      swizzle = ~0;
> +   } else {
> +      swizzle = tgsi_util_get_full_src_register_swizzle(reg,
> chan_index);
> +      if (swizzle > 3) {
> +         assert(0 && "invalid swizzle in emit_fetch()");
> +         return bld_base->base.undef;
> +      }
> +   }
> +
> +   assert(reg->Register.Index <=
> bld_base->info->file_max[reg->Register.File]);
> +
> +   if (bld_base->emit_fetch_funcs[reg->Register.File]) {
> +      res = bld_base->emit_fetch_funcs[reg->Register.File](bld_base,
> reg,
> +                                                           swizzle);
> +   } else {
> +      assert(0 && "invalid src register in emit_fetch()");
> +      return bld_base->base.undef;
> +   }
> +
> +   if (reg->Register.Absolute) {
> +      res = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
> res);
> +   }
> +
> +   if (reg->Register.Negate) {
> +      res = lp_build_negate( &bld_base->base, res );
> +   }
> +
> +   /*
> +    * Swizzle the argument
> +    */
> +
> +   if (swizzle == ~0) {
> +      res = bld_base->emit_swizzle(bld_base, res,
> +                     reg->Register.SwizzleX,
> +                     reg->Register.SwizzleY,
> +                     reg->Register.SwizzleZ,
> +                     reg->Register.SwizzleW);
> +   }
> +
> +   return res;
> +
> +}
> +
> +boolean
> +lp_build_tgsi_llvm(
> +   struct lp_build_tgsi_context * bld_base,
> +   const struct tgsi_token *tokens)
> +{
> +   struct tgsi_parse_context parse;
> +
> +   if (bld_base->emit_prologue) {
> +      bld_base->emit_prologue(bld_base);
> +   }
> +
> +   if (!lp_bld_tgsi_list_init(bld_base)) {
> +      return FALSE;
> +   }
> +
> +   tgsi_parse_init( &parse, tokens );
> +
> +   while( !tgsi_parse_end_of_tokens( &parse ) ) {
> +      tgsi_parse_token( &parse );
> +
> +      switch( parse.FullToken.Token.Type ) {
> +      case TGSI_TOKEN_TYPE_DECLARATION:
> +         /* Inputs already interpolated */
> +         bld_base->emit_declaration(bld_base,
> &parse.FullToken.FullDeclaration);
> +         break;
> +
> +      case TGSI_TOKEN_TYPE_INSTRUCTION:
> +         lp_bld_tgsi_add_instruction(bld_base,
> &parse.FullToken.FullInstruction);
> +         break;
> +
> +      case TGSI_TOKEN_TYPE_IMMEDIATE:
> +         bld_base->emit_immediate(bld_base,
> &parse.FullToken.FullImmediate);
> +         break;
> +
> +      case TGSI_TOKEN_TYPE_PROPERTY:
> +         break;
> +
> +      default:
> +         assert( 0 );
> +      }
> +   }
> +
> +   while (bld_base->pc != -1) {
> +      struct tgsi_full_instruction *instr = bld_base->instructions +
> +							bld_base->pc;
> +      const struct tgsi_opcode_info *opcode_info =
> +         tgsi_get_opcode_info(instr->Instruction.Opcode);
> +      if (!lp_build_tgsi_inst_llvm(bld_base, instr)) {
> +         _debug_printf("warning: failed to translate tgsi opcode %s
> to LLVM\n",
> +                       opcode_info->mnemonic);
> +         return FALSE;
> +      }
> +   }
> +
> +   tgsi_parse_free(&parse);
> +
> +   FREE(bld_base->instructions);
> +
> +   if (bld_base->emit_epilogue) {
> +      bld_base->emit_epilogue(bld_base);
> +   }
> +
> +   return TRUE;
> +}
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> index 9713d10..3ca6719 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> @@ -1,5 +1,6 @@
>  /**************************************************************************
>   *
> + * Copyright 2010-2011 Advanced Micro Devices, Inc.
>   * Copyright 2009 VMware, Inc.
>   * All Rights Reserved.
>   *
> @@ -30,21 +31,33 @@
>   * TGSI to LLVM IR translation.
>   *
>   * @author Jose Fonseca <jfonseca at vmware.com>
> + * @author Tom Stellard <thomas.stellard at amd.com>
>   */
>  
>  #ifndef LP_BLD_TGSI_H
>  #define LP_BLD_TGSI_H
>  
>  #include "gallivm/lp_bld.h"
> +#include "gallivm/lp_bld_action.h"
> +#include "gallivm/lp_bld_limits.h"
> +#include "lp_bld_type.h"
>  #include "pipe/p_compiler.h"
>  #include "pipe/p_state.h"
> +#include "tgsi/tgsi_exec.h"
>  #include "tgsi/tgsi_scan.h"
>  
>  
> +#define LP_CHAN_ALL ~0
> +
> +#define LP_MAX_INSTRUCTIONS 256
> +
> +struct tgsi_full_declaration;
> +struct tgsi_full_immediate;
> +struct tgsi_full_instruction;
> +struct tgsi_full_src_register;
> +struct tgsi_opcode_info;
>  struct tgsi_token;
>  struct tgsi_shader_info;
> -struct lp_type;
> -struct lp_build_context;
>  struct lp_build_mask_context;
>  struct gallivm_state;
>  
> @@ -207,4 +220,328 @@ lp_build_system_values_array(struct
> gallivm_state *gallivm,
>                               LLVMValueRef facing);
>  
>  
> +struct lp_exec_mask {
> +   struct lp_build_context *bld;
> +
> +   boolean has_mask;
> +
> +   LLVMTypeRef int_vec_type;
> +
> +   LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
> +   int cond_stack_size;
> +   LLVMValueRef cond_mask;
> +
> +   LLVMBasicBlockRef loop_block;
> +   LLVMValueRef cont_mask;
> +   LLVMValueRef break_mask;
> +   LLVMValueRef break_var;
> +   struct {
> +      LLVMBasicBlockRef loop_block;
> +      LLVMValueRef cont_mask;
> +      LLVMValueRef break_mask;
> +      LLVMValueRef break_var;
> +   } loop_stack[LP_MAX_TGSI_NESTING];
> +   int loop_stack_size;
> +
> +   LLVMValueRef ret_mask;
> +   struct {
> +      int pc;
> +      LLVMValueRef ret_mask;
> +   } call_stack[LP_MAX_TGSI_NESTING];
> +   int call_stack_size;
> +
> +   LLVMValueRef exec_mask;
> +};
> +
> +struct lp_build_tgsi_inst_list
> +{
> +   struct tgsi_full_instruction *instructions;
> +   uint max_instructions;
> +   uint num_instructions;
> +};
> +
> +unsigned lp_bld_tgsi_list_init(struct lp_build_tgsi_context *
> bld_base);
> +
> +
> +unsigned lp_bld_tgsi_add_instruction(
> +   struct lp_build_tgsi_context * bld_base,
> +   struct tgsi_full_instruction *inst_to_add);
> +
> +
> +struct lp_build_tgsi_context;
> +
> +
> +typedef LLVMValueRef (*lp_build_emit_fetch_fn)(struct
> lp_build_tgsi_context *,
> +                                        const struct
> tgsi_full_src_register *,
> +                                        unsigned);
> +
> +struct lp_build_tgsi_context
> +{
> +   struct lp_build_context base;
> +
> +   /** This array stores functions that are used to transform TGSI
> opcodes to
> +     * LLVM instructions.
> +     */
> +   struct lp_build_opcode_action op_actions[TGSI_OPCODE_LAST];
> +
> +   /* TGSI_OPCODE_RSQ is defined as 1 / sqrt( abs(src0.x) ),
> rsq_action
> +    * should compute 1 / sqrt (src0.x) */
> +   struct lp_build_opcode_action rsq_action;
> +
> +   const struct tgsi_shader_info *info;
> +
> +   lp_build_emit_fetch_fn emit_fetch_funcs[TGSI_FILE_COUNT];
> +
> +   LLVMValueRef (*emit_swizzle)(struct lp_build_tgsi_context *,
> +                         LLVMValueRef, unsigned, unsigned, unsigned,
> unsigned);
> +
> +   void (*emit_store)(struct lp_build_tgsi_context *,
> +                      const struct tgsi_full_instruction *,
> +                      const struct tgsi_opcode_info *,
> +                      LLVMValueRef dst[4]);
> +
> +   void (*emit_declaration)(struct lp_build_tgsi_context *,
> +                             const struct tgsi_full_declaration
> *decl);
> +
> +   void (*emit_immediate)(struct lp_build_tgsi_context *,
> +                          const struct tgsi_full_immediate *imm);
> +
> +
> +   /* Allow the user to store data in this structure rather than
> passing it
> +    * to every function. */
> +   void * userdata;
> +
> +   boolean soa;
> +
> +   int pc;
> +
> +   struct tgsi_full_instruction *instructions;
> +   uint max_instructions;
> +   uint num_instructions;
> +
> +   /** This function allows the user to insert some instructions at
> the
> +     * beginning of the program.  It is optional and does not need
> to be
> +     * implemented.
> +     */
> +   void (*emit_prologue)(struct lp_build_tgsi_context*);
> +
> +   /** This function allows the user to insert some instructions at
> the end of
> +     * the program.  This callback is intended to be used for
> emitting
> +     * instructions to handle the export for the output registers,
> but it can
> +     * be used for any purpose.  Implementing this function is
> optiona, but
> +     * recommended.
> +     */
> +   void (*emit_epilogue)(struct lp_build_tgsi_context*);
> +};
> +
> +struct lp_build_tgsi_soa_context
> +{
> +   struct lp_build_tgsi_context bld_base;
> +
> +   /* Builder for vector integer masks and indices */
> +   struct lp_build_context uint_bld;
> +
> +   /* Builder for scalar elements of shader's data type (float) */
> +   struct lp_build_context elem_bld;
> +
> +   LLVMValueRef consts_ptr;
> +   const LLVMValueRef *pos;
> +   const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS];
> +   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS];
> +
> +   const struct lp_build_sampler_soa *sampler;
> +
> +   LLVMValueRef
> immediates[LP_MAX_TGSI_IMMEDIATES][TGSI_NUM_CHANNELS];
> +   LLVMValueRef temps[LP_MAX_TGSI_TEMPS][TGSI_NUM_CHANNELS];
> +   LLVMValueRef addr[LP_MAX_TGSI_ADDRS][TGSI_NUM_CHANNELS];
> +   LLVMValueRef preds[LP_MAX_TGSI_PREDS][TGSI_NUM_CHANNELS];
> +
> +   /* We allocate/use this array of temps if (1 <<
> TGSI_FILE_TEMPORARY) is
> +    * set in the indirect_files field.
> +    * The temps[] array above is unused then.
> +    */
> +   LLVMValueRef temps_array;
> +
> +   /* We allocate/use this array of output if (1 <<
> TGSI_FILE_OUTPUT) is
> +    * set in the indirect_files field.
> +    * The outputs[] array above is unused then.
> +    */
> +   LLVMValueRef outputs_array;
> +
> +   /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT)
> is
> +    * set in the indirect_files field.
> +    * The inputs[] array above is unused then.
> +    */
> +   LLVMValueRef inputs_array;
> +
> +   LLVMValueRef system_values_array;
> +
> +   /** bitmask indicating which register files are accessed
> indirectly */
> +   unsigned indirect_files;
> +
> +   struct lp_build_mask_context *mask;
> +   struct lp_exec_mask exec_mask;
> +
> +   uint num_immediates;
> +
> +};
> +
> +void
> +lp_emit_declaration_soa(
> +   struct lp_build_tgsi_context *bld,
> +   const struct tgsi_full_declaration *decl);
> +
> +void lp_emit_immediate_soa(
> +   struct lp_build_tgsi_context *bld_base,
> +   const struct tgsi_full_immediate *imm);
> +
> +boolean
> +lp_emit_instruction_soa(
> +   struct lp_build_tgsi_soa_context *bld,
> +   const struct tgsi_full_instruction *inst,
> +   const struct tgsi_opcode_info *info);
> +
> +
> +LLVMValueRef
> +lp_get_temp_ptr_soa(
> +   struct lp_build_tgsi_soa_context *bld,
> +   unsigned index,
> +   unsigned chan);
> +
> +LLVMValueRef
> +lp_get_output_ptr(
> +   struct lp_build_tgsi_soa_context *bld,
> +   unsigned index,
> +   unsigned chan);
> +
> +struct lp_build_tgsi_aos_context
> +{
> +   struct lp_build_tgsi_context bld_base;
> +
> +   /* Builder for integer masks and indices */
> +   struct lp_build_context int_bld;
> +
> +   /*
> +    * AoS swizzle used:
> +    * - swizzles[0] = red index
> +    * - swizzles[1] = green index
> +    * - swizzles[2] = blue index
> +    * - swizzles[3] = alpha index
> +    */
> +   unsigned char swizzles[4];
> +   unsigned char inv_swizzles[4];
> +
> +   LLVMValueRef consts_ptr;
> +   const LLVMValueRef *inputs;
> +   LLVMValueRef *outputs;
> +
> +   struct lp_build_sampler_aos *sampler;
> +
> +   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
> +   LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
> +   LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
> +   LLVMValueRef preds[LP_MAX_TGSI_PREDS];
> +
> +   /* We allocate/use this array of temps if (1 <<
> TGSI_FILE_TEMPORARY) is
> +    * set in the indirect_files field.
> +    * The temps[] array above is unused then.
> +    */
> +   LLVMValueRef temps_array;
> +
> +   /** bitmask indicating which register files are accessed
> indirectly */
> +   unsigned indirect_files;
> +
> +};
> +
> +static INLINE struct lp_build_tgsi_soa_context *
> +lp_soa_context(struct lp_build_tgsi_context *bld_base)
> +{
> +   return (struct lp_build_tgsi_soa_context *)bld_base;
> +}
> +
> +static INLINE struct lp_build_tgsi_aos_context *
> +lp_aos_context(struct lp_build_tgsi_context *bld_base)
> +{
> +   return (struct lp_build_tgsi_aos_context *)bld_base;
> +}
> +
> +void
> +lp_emit_declaration_aos(
> +   struct lp_build_tgsi_aos_context *bld,
> +   const struct tgsi_full_declaration *decl);
> +
> +
> +boolean
> +lp_emit_instruction_aos(
> +   struct lp_build_tgsi_aos_context *bld,
> +   const struct tgsi_full_instruction *inst,
> +   const struct tgsi_opcode_info *info,
> +   int *pc);
> +
> +void
> +lp_emit_store_aos(
> +   struct lp_build_tgsi_aos_context *bld,
> +   const struct tgsi_full_instruction *inst,
> +   unsigned index,
> +   LLVMValueRef value);
> +
> +void lp_build_fetch_args(
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data);
> +
> +LLVMValueRef
> +lp_build_tgsi_inst_llvm_aos(
> +   struct lp_build_tgsi_context * bld_base,
> +   const struct tgsi_full_instruction *inst);
> +
> +void
> +lp_build_tgsi_intrinsic(
> + const struct lp_build_opcode_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data);
> +
> +LLVMValueRef
> +lp_build_emit_llvm(
> +   struct lp_build_tgsi_context *bld_base,
> +   unsigned tgsi_opcode,
> +   struct lp_build_emit_data * emit_data);
> +
> +LLVMValueRef
> +lp_build_emit_llvm_unary(
> +   struct lp_build_tgsi_context *bld_base,
> +   unsigned tgsi_opcode,
> +   LLVMValueRef arg0);
> +
> +LLVMValueRef
> +lp_build_emit_llvm_binary(
> +   struct lp_build_tgsi_context *bld_base,
> +   unsigned tgsi_opcode,
> +   LLVMValueRef arg0,
> +   LLVMValueRef arg1);
> +
> +LLVMValueRef
> +lp_build_emit_llvm_ternary(
> +   struct lp_build_tgsi_context *bld_base,
> +   unsigned tgsi_opcode,
> +   LLVMValueRef arg0,
> +   LLVMValueRef arg1,
> +   LLVMValueRef arg2);
> +
> +boolean
> +lp_build_tgsi_inst_llvm(
> +   struct lp_build_tgsi_context * bld_base,
> +   const struct tgsi_full_instruction *inst);
> +
> +LLVMValueRef
> +lp_build_emit_fetch(
> +   struct lp_build_tgsi_context *bld_base,
> +   const struct tgsi_full_instruction *inst,
> +   unsigned src_op,
> +   const unsigned chan_index);
> +
> +boolean
> +lp_build_tgsi_llvm(
> +   struct lp_build_tgsi_context * bld_base,
> +   const struct tgsi_token *tokens);
> +
>  #endif /* LP_BLD_TGSI_H */
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
> index a021efd..26cfffe 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
> @@ -55,61 +55,15 @@
>  #include "lp_bld_flow.h"
>  #include "lp_bld_quad.h"
>  #include "lp_bld_tgsi.h"
> -#include "lp_bld_limits.h"
>  #include "lp_bld_debug.h"
>  
>  
> -#define LP_MAX_INSTRUCTIONS 256
> -
> -
> -struct lp_build_tgsi_aos_context
> -{
> -   struct lp_build_context base;
> -
> -   /* Builder for integer masks and indices */
> -   struct lp_build_context int_bld;
> -
> -   /*
> -    * AoS swizzle used:
> -    * - swizzles[0] = red index
> -    * - swizzles[1] = green index
> -    * - swizzles[2] = blue index
> -    * - swizzles[3] = alpha index
> -    */
> -   unsigned char swizzles[4];
> -   unsigned char inv_swizzles[4];
> -
> -   LLVMValueRef consts_ptr;
> -   const LLVMValueRef *inputs;
> -   LLVMValueRef *outputs;
> -
> -   struct lp_build_sampler_aos *sampler;
> -
> -   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
> -   LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
> -   LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
> -   LLVMValueRef preds[LP_MAX_TGSI_PREDS];
> -
> -   /* We allocate/use this array of temps if (1 <<
> TGSI_FILE_TEMPORARY) is
> -    * set in the indirect_files field.
> -    * The temps[] array above is unused then.
> -    */
> -   LLVMValueRef temps_array;
> -
> -   /** bitmask indicating which register files are accessed
> indirectly */
> -   unsigned indirect_files;
> -
> -   struct tgsi_full_instruction *instructions;
> -   uint max_instructions;
> -};
> -
> -
>  /**
>   * Wrapper around lp_build_swizzle_aos which translates swizzles to
>   another
>   * ordering.
>   */
>  static LLVMValueRef
> -swizzle_aos(struct lp_build_tgsi_aos_context *bld,
> +swizzle_aos(struct lp_build_tgsi_context *bld_base,
>              LLVMValueRef a,
>              unsigned swizzle_x,
>              unsigned swizzle_y,
> @@ -117,6 +71,7 @@ swizzle_aos(struct lp_build_tgsi_aos_context *bld,
>              unsigned swizzle_w)
>  {
>     unsigned char swizzles[4];
> +   struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
>  
>     assert(swizzle_x < 4);
>     assert(swizzle_y < 4);
> @@ -128,7 +83,7 @@ swizzle_aos(struct lp_build_tgsi_aos_context *bld,
>     swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
>     swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
>  
> -   return lp_build_swizzle_aos(&bld->base, a, swizzles);
> +   return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
>  }
>  
>  
> @@ -138,149 +93,132 @@ swizzle_scalar_aos(struct
> lp_build_tgsi_aos_context *bld,
>                     unsigned chan)
>  {
>     chan = bld->swizzles[chan];
> -   return lp_build_swizzle_scalar_aos(&bld->base, a, chan);
> +   return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan);
>  }
>  
>  
> -/**
> - * Register fetch.
> - */
>  static LLVMValueRef
> -emit_fetch(
> -   struct lp_build_tgsi_aos_context *bld,
> -   const struct tgsi_full_instruction *inst,
> -   unsigned src_op)
> +emit_fetch_constant(
> +   struct lp_build_tgsi_context * bld_base,
> +   const struct tgsi_full_src_register * reg,
> +   const unsigned swizzle)
>  {
> -   LLVMBuilderRef builder = bld->base.gallivm->builder;
> -   struct lp_type type = bld->base.type;
> -   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
> +   struct lp_build_tgsi_aos_context * bld =
> lp_aos_context(bld_base);
> +   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> +   struct lp_type type = bld_base->base.type;
>     LLVMValueRef res;
>     unsigned chan;
>  
>     assert(!reg->Register.Indirect);
>  
>     /*
> -    * Fetch the from the register file.
> +    * Get the constants components
>      */
>  
> -   switch (reg->Register.File) {
> -   case TGSI_FILE_CONSTANT:
> -      /*
> -       * Get the constants components
> -       */
> -
> -      res = bld->base.undef;
> -      for (chan = 0; chan < 4; ++chan) {
> -         LLVMValueRef index;
> -         LLVMValueRef scalar_ptr;
> -         LLVMValueRef scalar;
> -         LLVMValueRef swizzle;
> -
> -         index = lp_build_const_int32(bld->base.gallivm,
> reg->Register.Index * 4 + chan);
> -
> -         scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
> -                                   &index, 1, "");
> +   res = bld->bld_base.base.undef;
> +   for (chan = 0; chan < 4; ++chan) {
> +      LLVMValueRef index;
> +      LLVMValueRef scalar_ptr;
> +      LLVMValueRef scalar;
> +      LLVMValueRef swizzle;
>  
> -         scalar = LLVMBuildLoad(builder, scalar_ptr, "");
> +      index = lp_build_const_int32(bld->bld_base.base.gallivm,
> +                                   reg->Register.Index * 4 + chan);
>  
> -         lp_build_name(scalar, "const[%u].%c", reg->Register.Index,
> "xyzw"[chan]);
> +      scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1,
> "");
>  
> -         /*
> -          * NOTE: constants array is always assumed to be RGBA
> -          */
> +      scalar = LLVMBuildLoad(builder, scalar_ptr, "");
>  
> -         swizzle = lp_build_const_int32(bld->base.gallivm, chan);
> -
> -         res = LLVMBuildInsertElement(builder, res, scalar, swizzle,
> "");
> -      }
> +      lp_build_name(scalar, "const[%u].%c", reg->Register.Index,
> "xyzw"[chan]);
>  
>        /*
> -       * Broadcast the first quaternion to all others.
> -       *
> -       * XXX: could be factored into a reusable function.
> +       * NOTE: constants array is always assumed to be RGBA
>         */
>  
> -      if (type.length > 4) {
> -         LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
> -         unsigned i;
> +      swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
> chan);
>  
> -         for (chan = 0; chan < 4; ++chan) {
> -            shuffles[chan] = lp_build_const_int32(bld->base.gallivm,
> chan);
> -         }
> -
> -         for (i = 4; i < type.length; ++i) {
> -            shuffles[i] = shuffles[i % 4];
> -         }
> +      res = LLVMBuildInsertElement(builder, res, scalar, swizzle,
> "");
> +   }
>  
> -         res = LLVMBuildShuffleVector(builder,
> -                                      res, bld->base.undef,
> -                                      LLVMConstVector(shuffles,
> type.length),
> -                                      "");
> -      }
> -      break;
> +   /*
> +    * Broadcast the first quaternion to all others.
> +    *
> +    * XXX: could be factored into a reusable function.
> +    */
>  
> -   case TGSI_FILE_IMMEDIATE:
> -      res = bld->immediates[reg->Register.Index];
> -      assert(res);
> -      break;
> +   if (type.length > 4) {
> +      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
> +      unsigned i;
>  
> -   case TGSI_FILE_INPUT:
> -      res = bld->inputs[reg->Register.Index];
> -      assert(res);
> -      break;
> -
> -   case TGSI_FILE_TEMPORARY:
> -      {
> -         LLVMValueRef temp_ptr;
> -         temp_ptr = bld->temps[reg->Register.Index];
> -         res = LLVMBuildLoad(builder, temp_ptr, "");
> -         if (!res)
> -            return bld->base.undef;
> +      for (chan = 0; chan < 4; ++chan) {
> +         shuffles[chan] =
> lp_build_const_int32(bld->bld_base.base.gallivm, chan);
>        }
> -      break;
>  
> -   default:
> -      assert(0 && "invalid src register in emit_fetch()");
> -      return bld->base.undef;
> -   }
> -
> -   /*
> -    * Apply sign modifier.
> -    */
> +      for (i = 4; i < type.length; ++i) {
> +         shuffles[i] = shuffles[i % 4];
> +      }
>  
> -   if (reg->Register.Absolute) {
> -      res = lp_build_abs(&bld->base, res);
> +      res = LLVMBuildShuffleVector(builder,
> +                                   res, bld->bld_base.base.undef,
> +                                   LLVMConstVector(shuffles,
> type.length),
> +                                   "");
>     }
> +   return res;
> +}
>  
> -   if(reg->Register.Negate) {
> -      res = lp_build_negate(&bld->base, res);
> -   }
> +static LLVMValueRef
> +emit_fetch_immediate(
> +   struct lp_build_tgsi_context * bld_base,
> +   const struct tgsi_full_src_register * reg,
> +   const unsigned swizzle)
> +{
> +   struct lp_build_tgsi_aos_context * bld =
> lp_aos_context(bld_base);
> +   LLVMValueRef res = bld->immediates[reg->Register.Index];
> +   assert(res);
> +   return res;
> +}
>  
> -   /*
> -    * Swizzle the argument
> -    */
> +static LLVMValueRef
> +emit_fetch_input(
> +   struct lp_build_tgsi_context * bld_base,
> +   const struct tgsi_full_src_register * reg,
> +   const unsigned swizzle)
> +{
> +   struct lp_build_tgsi_aos_context * bld =
> lp_aos_context(bld_base);
> +   LLVMValueRef res = bld->inputs[reg->Register.Index];
> +   assert(!reg->Register.Indirect);
> +   assert(res);
> +   return res;
> +}
>  
> -   res = swizzle_aos(bld, res,
> -                     reg->Register.SwizzleX,
> -                     reg->Register.SwizzleY,
> -                     reg->Register.SwizzleZ,
> -                     reg->Register.SwizzleW);
> +static LLVMValueRef
> +emit_fetch_temporary(
> +   struct lp_build_tgsi_context * bld_base,
> +   const struct tgsi_full_src_register * reg,
> +   const unsigned swizzle)
> +{
> +   struct lp_build_tgsi_aos_context * bld =
> lp_aos_context(bld_base);
> +   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> +   LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
> +   LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
> +   assert(!reg->Register.Indirect);
> +   if (!res)
> +      return bld->bld_base.base.undef;
>  
>     return res;
>  }
>  
> -
>  /**
>   * Register store.
>   */
> -static void
> -emit_store(
> +void
> +lp_emit_store_aos(
>     struct lp_build_tgsi_aos_context *bld,
>     const struct tgsi_full_instruction *inst,
>     unsigned index,
>     LLVMValueRef value)
>  {
> -   LLVMBuilderRef builder = bld->base.gallivm->builder;
> +   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
>     const struct tgsi_full_dst_register *reg = &inst->Dst[index];
>     LLVMValueRef mask = NULL;
>     LLVMValueRef ptr;
> @@ -294,13 +232,13 @@ emit_store(
>        break;
>  
>     case TGSI_SAT_ZERO_ONE:
> -      value = lp_build_max(&bld->base, value, bld->base.zero);
> -      value = lp_build_min(&bld->base, value, bld->base.one);
> +      value = lp_build_max(&bld->bld_base.base, value,
> bld->bld_base.base.zero);
> +      value = lp_build_min(&bld->bld_base.base, value,
> bld->bld_base.base.one);
>        break;
>  
>     case TGSI_SAT_MINUS_PLUS_ONE:
> -      value = lp_build_max(&bld->base, value,
> lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0));
> -      value = lp_build_min(&bld->base, value, bld->base.one);
> +      value = lp_build_max(&bld->bld_base.base, value,
> lp_build_const_vec(bld->bld_base.base.gallivm,
> bld->bld_base.base.type, -1.0));
> +      value = lp_build_min(&bld->bld_base.base, value,
> bld->bld_base.base.one);
>        break;
>  
>     default:
> @@ -335,6 +273,8 @@ emit_store(
>        return;
>     }
>  
> +   if (!ptr)
> +      return;
>     /*
>      * Predicate
>      */
> @@ -350,17 +290,17 @@ emit_store(
>        /*
>         * Convert the value to an integer mask.
>         */
> -      pred = lp_build_compare(bld->base.gallivm,
> -                               bld->base.type,
> +      pred = lp_build_compare(bld->bld_base.base.gallivm,
> +                               bld->bld_base.base.type,
>                                 PIPE_FUNC_NOTEQUAL,
>                                 pred,
> -                               bld->base.zero);
> +                               bld->bld_base.base.zero);
>  
>        if (inst->Predicate.Negate) {
>           pred = LLVMBuildNot(builder, pred, "");
>        }
>  
> -      pred = swizzle_aos(bld, pred,
> +      pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
>                           inst->Predicate.SwizzleX,
>                           inst->Predicate.SwizzleY,
>                           inst->Predicate.SwizzleZ,
> @@ -380,7 +320,7 @@ emit_store(
>     if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
>        LLVMValueRef writemask;
>  
> -      writemask = lp_build_const_mask_aos(bld->base.gallivm,
> bld->base.type,
> +      writemask =
> lp_build_const_mask_aos(bld->bld_base.base.gallivm,
> bld->bld_base.base.type,
>                                            reg->Register.WriteMask);
>  
>        if (mask) {
> @@ -394,7 +334,7 @@ emit_store(
>        LLVMValueRef orig_value;
>  
>        orig_value = LLVMBuildLoad(builder, ptr, "");
> -      value = lp_build_select(&bld->base,
> +      value = lp_build_select(&bld->bld_base.base,
>                                mask, value, orig_value);
>     }
>  
> @@ -419,44 +359,44 @@ emit_tex(struct lp_build_tgsi_aos_context *bld,
>  
>     if (!bld->sampler) {
>        _debug_printf("warning: found texture instruction but no
>        sampler generator supplied\n");
> -      return bld->base.undef;
> +      return bld->bld_base.base.undef;
>     }
>  
>     target = inst->Texture.Texture;
>  
> -   coords = emit_fetch( bld, inst, 0 );
> +   coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 ,
> LP_CHAN_ALL);
>  
>     if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
> -      ddx = emit_fetch( bld, inst, 1 );
> -      ddy = emit_fetch( bld, inst, 2 );
> +      ddx = lp_build_emit_fetch( &bld->bld_base, inst, 1 ,
> LP_CHAN_ALL);
> +      ddy = lp_build_emit_fetch( &bld->bld_base, inst, 2 ,
> LP_CHAN_ALL);
>        unit = inst->Src[3].Register.Index;
>     }  else {
>  #if 0
> -      ddx = lp_build_ddx( &bld->base, coords );
> -      ddy = lp_build_ddy( &bld->base, coords );
> +      ddx = lp_build_ddx( &bld->bld_base.base, coords );
> +      ddy = lp_build_ddy( &bld->bld_base.base, coords );
>  #else
>        /* TODO */
> -      ddx = bld->base.one;
> -      ddy = bld->base.one;
> +      ddx = bld->bld_base.base.one;
> +      ddy = bld->bld_base.base.one;
>  #endif
>        unit = inst->Src[1].Register.Index;
>     }
>  
>     return bld->sampler->emit_fetch_texel(bld->sampler,
> -                                         &bld->base,
> +                                         &bld->bld_base.base,
>                                           target, unit,
>                                           coords, ddx, ddy,
>                                           modifier);
>  }
>  
>  
> -static void
> -emit_declaration(
> +void
> +lp_emit_declaration_aos(
>     struct lp_build_tgsi_aos_context *bld,
>     const struct tgsi_full_declaration *decl)
>  {
> -   struct gallivm_state *gallivm = bld->base.gallivm;
> -   LLVMTypeRef vec_type = lp_build_vec_type(bld->base.gallivm,
> bld->base.type);
> +   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
> +   LLVMTypeRef vec_type =
> lp_build_vec_type(bld->bld_base.base.gallivm,
> bld->bld_base.base.type);
>  
>     unsigned first = decl->Range.First;
>     unsigned last = decl->Range.Last;
> @@ -468,7 +408,7 @@ emit_declaration(
>           assert(idx < LP_MAX_TGSI_TEMPS);
>           if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
>              LLVMValueRef array_size = lp_build_const_int32(gallivm,
>              last + 1);
> -            bld->temps_array =
> lp_build_array_alloca(bld->base.gallivm,
> +            bld->temps_array =
> lp_build_array_alloca(bld->bld_base.base.gallivm,
>                                                       vec_type,
>                                                       array_size,
>                                                       "");
>           } else {
>              bld->temps[idx] = lp_build_alloca(gallivm, vec_type,
>              "");
> @@ -501,8 +441,8 @@ emit_declaration(
>   * Emit LLVM for one TGSI instruction.
>   * \param return TRUE for success, FALSE otherwise
>   */
> -static boolean
> -emit_instruction(
> +boolean
> +lp_emit_instruction_aos(
>     struct lp_build_tgsi_aos_context *bld,
>     const struct tgsi_full_instruction *inst,
>     const struct tgsi_opcode_info *info,
> @@ -527,17 +467,17 @@ emit_instruction(
>  
>     assert(info->num_dst <= 1);
>     if (info->num_dst) {
> -      dst0 = bld->base.undef;
> +      dst0 = bld->bld_base.base.undef;
>     }
>  
>     switch (inst->Instruction.Opcode) {
>     case TGSI_OPCODE_ARL:
> -      src0 = emit_fetch(bld, inst, 0);
> -      dst0 = lp_build_floor(&bld->base, src0);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      dst0 = lp_build_floor(&bld->bld_base.base, src0);
>        break;
>  
>     case TGSI_OPCODE_MOV:
> -      dst0 = emit_fetch(bld, inst, 0);
> +      dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
>        break;
>  
>     case TGSI_OPCODE_LIT:
> @@ -545,15 +485,15 @@ emit_instruction(
>  
>     case TGSI_OPCODE_RCP:
>     /* TGSI_OPCODE_RECIP */
> -      src0 = emit_fetch(bld, inst, 0);
> -      dst0 = lp_build_rcp(&bld->base, src0);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      dst0 = lp_build_rcp(&bld->bld_base.base, src0);
>        break;
>  
>     case TGSI_OPCODE_RSQ:
>     /* TGSI_OPCODE_RECIPSQRT */
> -      src0 = emit_fetch(bld, inst, 0);
> -      tmp0 = lp_build_abs(&bld->base, src0);
> -      dst0 = lp_build_rsqrt(&bld->base, tmp0);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      tmp0 = lp_build_emit_llvm_unary(&bld->bld_base,
> TGSI_OPCODE_ABS, src0);
> +      dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
>        break;
>  
>     case TGSI_OPCODE_EXP:
> @@ -563,15 +503,15 @@ emit_instruction(
>        return FALSE;
>  
>     case TGSI_OPCODE_MUL:
> -      src0 = emit_fetch(bld, inst, 0);
> -      src1 = emit_fetch(bld, inst, 1);
> -      dst0 = lp_build_mul(&bld->base, src0, src1);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> +      dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
>        break;
>  
>     case TGSI_OPCODE_ADD:
> -      src0 = emit_fetch(bld, inst, 0);
> -      src1 = emit_fetch(bld, inst, 1);
> -      dst0 = lp_build_add(&bld->base, src0, src1);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> +      dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
>        break;
>  
>     case TGSI_OPCODE_DP3:
> @@ -586,121 +526,116 @@ emit_instruction(
>        return FALSE;
>  
>     case TGSI_OPCODE_MIN:
> -      src0 = emit_fetch(bld, inst, 0);
> -      src1 = emit_fetch(bld, inst, 1);
> -      dst0 = lp_build_max(&bld->base, src0, src1);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> +      dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
>        break;
>  
>     case TGSI_OPCODE_MAX:
> -      src0 = emit_fetch(bld, inst, 0);
> -      src1 = emit_fetch(bld, inst, 1);
> -      dst0 = lp_build_max(&bld->base, src0, src1);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> +      dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
>        break;
>  
>     case TGSI_OPCODE_SLT:
>     /* TGSI_OPCODE_SETLT */
> -      src0 = emit_fetch(bld, inst, 0);
> -      src1 = emit_fetch(bld, inst, 1);
> -      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1);
> -      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one,
> bld->base.zero);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> +      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0,
> src1);
> +      dst0 = lp_build_select(&bld->bld_base.base, tmp0,
> bld->bld_base.base.one, bld->bld_base.base.zero);
>        break;
>  
>     case TGSI_OPCODE_SGE:
>     /* TGSI_OPCODE_SETGE */
> -      src0 = emit_fetch(bld, inst, 0);
> -      src1 = emit_fetch(bld, inst, 1);
> -      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1);
> -      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one,
> bld->base.zero);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> +      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL,
> src0, src1);
> +      dst0 = lp_build_select(&bld->bld_base.base, tmp0,
> bld->bld_base.base.one, bld->bld_base.base.zero);
>        break;
>  
>     case TGSI_OPCODE_MAD:
>     /* TGSI_OPCODE_MADD */
> -      src0 = emit_fetch(bld, inst, 0);
> -      src1 = emit_fetch(bld, inst, 1);
> -      src2 = emit_fetch(bld, inst, 2);
> -      tmp0 = lp_build_mul(&bld->base, src0, src1);
> -      dst0 = lp_build_add(&bld->base, tmp0, src2);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> +      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2,
> LP_CHAN_ALL);
> +      tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
> +      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
>        break;
>  
>     case TGSI_OPCODE_SUB:
> -      src0 = emit_fetch(bld, inst, 0);
> -      src1 = emit_fetch(bld, inst, 1);
> -      dst0 = lp_build_sub(&bld->base, src0, src1);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> +      dst0 = lp_build_sub(&bld->bld_base.base, src0, src1);
>        break;
>  
>     case TGSI_OPCODE_LRP:
> -      src0 = emit_fetch(bld, inst, 0);
> -      src1 = emit_fetch(bld, inst, 1);
> -      src2 = emit_fetch(bld, inst, 2);
> -      tmp0 = lp_build_sub(&bld->base, src1, src2);
> -      tmp0 = lp_build_mul(&bld->base, src0, tmp0);
> -      dst0 = lp_build_add(&bld->base, tmp0, src2);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> +      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2,
> LP_CHAN_ALL);
> +      tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
> +      tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
> +      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
>        break;
>  
>     case TGSI_OPCODE_CND:
> -      src0 = emit_fetch(bld, inst, 0);
> -      src1 = emit_fetch(bld, inst, 1);
> -      src2 = emit_fetch(bld, inst, 2);
> -      tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type,
> 0.5);
> -      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2,
> tmp1);
> -      dst0 = lp_build_select(&bld->base, tmp0, src0, src1);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> +      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2,
> LP_CHAN_ALL);
> +      tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm,
> bld->bld_base.base.type, 0.5);
> +      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER,
> src2, tmp1);
> +      dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1);
>        break;
>  
>     case TGSI_OPCODE_DP2A:
>        return FALSE;
>  
>     case TGSI_OPCODE_FRC:
> -      src0 = emit_fetch(bld, inst, 0);
> -      tmp0 = lp_build_floor(&bld->base, src0);
> -      dst0 = lp_build_sub(&bld->base, src0, tmp0);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      tmp0 = lp_build_floor(&bld->bld_base.base, src0);
> +      dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
>        break;
>  
>     case TGSI_OPCODE_CLAMP:
> -      src0 = emit_fetch(bld, inst, 0);
> -      src1 = emit_fetch(bld, inst, 1);
> -      src2 = emit_fetch(bld, inst, 2);
> -      tmp0 = lp_build_max(&bld->base, src0, src1);
> -      dst0 = lp_build_min(&bld->base, tmp0, src2);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> +      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2,
> LP_CHAN_ALL);
> +      tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
> +      dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
>        break;
>  
>     case TGSI_OPCODE_FLR:
> -      src0 = emit_fetch(bld, inst, 0);
> -      dst0 = lp_build_floor(&bld->base, src0);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      dst0 = lp_build_floor(&bld->bld_base.base, src0);
>        break;
>  
>     case TGSI_OPCODE_ROUND:
> -      src0 = emit_fetch(bld, inst, 0);
> -      dst0 = lp_build_round(&bld->base, src0);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      dst0 = lp_build_round(&bld->bld_base.base, src0);
>        break;
>  
>     case TGSI_OPCODE_EX2:
> -      src0 = emit_fetch(bld, inst, 0);
> -      tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0,
> TGSI_SWIZZLE_X);
> -      dst0 = lp_build_exp2(&bld->base, tmp0);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0,
> TGSI_SWIZZLE_X);
> +      dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
>        break;
>  
>     case TGSI_OPCODE_LG2:
> -      src0 = emit_fetch(bld, inst, 0);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
>        tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
> -      dst0 = lp_build_log2(&bld->base, tmp0);
> +      dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
>        break;
>  
>     case TGSI_OPCODE_POW:
> -      src0 = emit_fetch(bld, inst, 0);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
>        src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
> -      src1 = emit_fetch(bld, inst, 1);
> +      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
>        src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
> -      dst0 = lp_build_pow(&bld->base, src0, src1);
> +      dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
>        break;
>  
>     case TGSI_OPCODE_XPD:
>        return FALSE;
>  
> -   case TGSI_OPCODE_ABS:
> -      src0 = emit_fetch(bld, inst, 0);
> -      dst0 = lp_build_abs(&bld->base, src0);
> -      break;
> -
>     case TGSI_OPCODE_RCC:
>        /* deprecated? */
>        assert(0);
> @@ -710,9 +645,9 @@ emit_instruction(
>        return FALSE;
>  
>     case TGSI_OPCODE_COS:
> -      src0 = emit_fetch(bld, inst, 0);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
>        tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
> -      dst0 = lp_build_cos(&bld->base, tmp0);
> +      dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
>        break;
>  
>     case TGSI_OPCODE_DDX:
> @@ -748,45 +683,45 @@ emit_instruction(
>        return FALSE;
>  
>     case TGSI_OPCODE_SEQ:
> -      src0 = emit_fetch(bld, inst, 0);
> -      src1 = emit_fetch(bld, inst, 1);
> -      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1);
> -      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one,
> bld->base.zero);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> +      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL,
> src0, src1);
> +      dst0 = lp_build_select(&bld->bld_base.base, tmp0,
> bld->bld_base.base.one, bld->bld_base.base.zero);
>        break;
>  
>     case TGSI_OPCODE_SFL:
> -      dst0 = bld->base.zero;
> +      dst0 = bld->bld_base.base.zero;
>        break;
>  
>     case TGSI_OPCODE_SGT:
> -      src0 = emit_fetch(bld, inst, 0);
> -      src1 = emit_fetch(bld, inst, 1);
> -      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0,
> src1);
> -      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one,
> bld->base.zero);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> +      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER,
> src0, src1);
> +      dst0 = lp_build_select(&bld->bld_base.base, tmp0,
> bld->bld_base.base.one, bld->bld_base.base.zero);
>        break;
>  
>     case TGSI_OPCODE_SIN:
> -      src0 = emit_fetch(bld, inst, 0);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
>        tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
> -      dst0 = lp_build_sin(&bld->base, tmp0);
> +      dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
>        break;
>  
>     case TGSI_OPCODE_SLE:
> -      src0 = emit_fetch(bld, inst, 0);
> -      src1 = emit_fetch(bld, inst, 1);
> -      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1);
> -      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one,
> bld->base.zero);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> +      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL,
> src0, src1);
> +      dst0 = lp_build_select(&bld->bld_base.base, tmp0,
> bld->bld_base.base.one, bld->bld_base.base.zero);
>        break;
>  
>     case TGSI_OPCODE_SNE:
> -      src0 = emit_fetch(bld, inst, 0);
> -      src1 = emit_fetch(bld, inst, 1);
> -      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0,
> src1);
> -      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one,
> bld->base.zero);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> +      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL,
> src0, src1);
> +      dst0 = lp_build_select(&bld->bld_base.base, tmp0,
> bld->bld_base.base.one, bld->bld_base.base.zero);
>        break;
>  
>     case TGSI_OPCODE_STR:
> -      dst0 = bld->base.one;
> +      dst0 = bld->bld_base.base.one;
>        break;
>  
>     case TGSI_OPCODE_TEX:
> @@ -834,8 +769,8 @@ emit_instruction(
>        break;
>  
>     case TGSI_OPCODE_ARR:
> -      src0 = emit_fetch(bld, inst, 0);
> -      dst0 = lp_build_round(&bld->base, src0);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      dst0 = lp_build_round(&bld->bld_base.base, src0);
>        break;
>  
>     case TGSI_OPCODE_BRA:
> @@ -856,16 +791,16 @@ emit_instruction(
>  
>     case TGSI_OPCODE_SSG:
>     /* TGSI_OPCODE_SGN */
> -      tmp0 = emit_fetch(bld, inst, 0);
> -      dst0 = lp_build_sgn(&bld->base, tmp0);
> +      tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
>        break;
>  
>     case TGSI_OPCODE_CMP:
> -      src0 = emit_fetch(bld, inst, 0);
> -      src1 = emit_fetch(bld, inst, 1);
> -      src2 = emit_fetch(bld, inst, 2);
> -      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0,
> bld->base.zero);
> -      dst0 = lp_build_select(&bld->base, tmp0, src1, src2);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1,
> LP_CHAN_ALL);
> +      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2,
> LP_CHAN_ALL);
> +      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0,
> bld->bld_base.base.zero);
> +      dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
>        break;
>  
>     case TGSI_OPCODE_SCS:
> @@ -934,8 +869,8 @@ emit_instruction(
>        break;
>  
>     case TGSI_OPCODE_CEIL:
> -      src0 = emit_fetch(bld, inst, 0);
> -      dst0 = lp_build_ceil(&bld->base, src0);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      dst0 = lp_build_ceil(&bld->bld_base.base, src0);
>        break;
>  
>     case TGSI_OPCODE_I2F:
> @@ -951,8 +886,8 @@ emit_instruction(
>        break;
>  
>     case TGSI_OPCODE_TRUNC:
> -      src0 = emit_fetch(bld, inst, 0);
> -      dst0 = lp_build_trunc(&bld->base, src0);
> +      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0,
> LP_CHAN_ALL);
> +      dst0 = lp_build_trunc(&bld->bld_base.base, src0);
>        break;
>  
>     case TGSI_OPCODE_SHL:
> @@ -1028,7 +963,7 @@ emit_instruction(
>     }
>     
>     if (info->num_dst) {
> -      emit_store(bld, inst, 0, dst0);
> +      lp_emit_store_aos(bld, inst, 0, dst0);
>     }
>  
>     return TRUE;
> @@ -1049,13 +984,12 @@ lp_build_tgsi_aos(struct gallivm_state
> *gallivm,
>     struct lp_build_tgsi_aos_context bld;
>     struct tgsi_parse_context parse;
>     uint num_immediates = 0;
> -   uint num_instructions = 0;
>     unsigned chan;
>     int pc = 0;
>  
>     /* Setup build context */
>     memset(&bld, 0, sizeof bld);
> -   lp_build_context_init(&bld.base, gallivm, type);
> +   lp_build_context_init(&bld.bld_base.base, gallivm, type);
>     lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
>  
>     for (chan = 0; chan < 4; ++chan) {
> @@ -1068,11 +1002,17 @@ lp_build_tgsi_aos(struct gallivm_state
> *gallivm,
>     bld.consts_ptr = consts_ptr;
>     bld.sampler = sampler;
>     bld.indirect_files = info->indirect_files;
> -   bld.instructions = (struct tgsi_full_instruction *)
> -                      MALLOC(LP_MAX_INSTRUCTIONS * sizeof(struct
> tgsi_full_instruction));
> -   bld.max_instructions = LP_MAX_INSTRUCTIONS;
> +   bld.bld_base.emit_swizzle = swizzle_aos;
> +
> +   bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] =
> emit_fetch_constant;
> +   bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] =
> emit_fetch_immediate;
> +   bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] =
> emit_fetch_input;
> +   bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] =
> emit_fetch_temporary;
>  
> -   if (!bld.instructions) {
> +   /* Set opcode actions */
> +   lp_set_default_actions_cpu(&bld.bld_base);
> +
> +   if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
>        return;
>     }
>  
> @@ -1084,33 +1024,13 @@ lp_build_tgsi_aos(struct gallivm_state
> *gallivm,
>        switch(parse.FullToken.Token.Type) {
>        case TGSI_TOKEN_TYPE_DECLARATION:
>           /* Inputs already interpolated */
> -         emit_declaration(&bld, &parse.FullToken.FullDeclaration);
> +         lp_emit_declaration_aos(&bld,
> &parse.FullToken.FullDeclaration);
>           break;
>  
>        case TGSI_TOKEN_TYPE_INSTRUCTION:
> -         {
> -            /* save expanded instruction */
> -            if (num_instructions == bld.max_instructions) {
> -               struct tgsi_full_instruction *instructions;
> -               instructions = REALLOC(bld.instructions,
> -                                      bld.max_instructions
> -                                      * sizeof(struct
> tgsi_full_instruction),
> -                                      (bld.max_instructions +
> LP_MAX_INSTRUCTIONS)
> -                                      * sizeof(struct
> tgsi_full_instruction));
> -               if (!instructions) {
> -                  break;
> -               }
> -               bld.instructions = instructions;
> -               bld.max_instructions += LP_MAX_INSTRUCTIONS;
> -            }
> -
> -            memcpy(bld.instructions + num_instructions,
> -                   &parse.FullToken.FullInstruction,
> -                   sizeof(bld.instructions[0]));
> -
> -            num_instructions++;
> -         }
> -
> +         /* save expanded instruction */
> +         lp_bld_tgsi_add_instruction(&bld.bld_base,
> +
>                                     &parse.FullToken.FullInstruction);
>           break;
>  
>        case TGSI_TOKEN_TYPE_IMMEDIATE:
> @@ -1144,10 +1064,10 @@ lp_build_tgsi_aos(struct gallivm_state
> *gallivm,
>     }
>  
>     while (pc != -1) {
> -      struct tgsi_full_instruction *instr = bld.instructions + pc;
> +      struct tgsi_full_instruction *instr =
> bld.bld_base.instructions + pc;
>        const struct tgsi_opcode_info *opcode_info =
>           tgsi_get_opcode_info(instr->Instruction.Opcode);
> -      if (!emit_instruction(&bld, instr, opcode_info, &pc))
> +      if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
>           _debug_printf("warning: failed to translate tgsi opcode %s
>           to LLVM\n",
>                         opcode_info->mnemonic);
>     }
> @@ -1168,6 +1088,5 @@ lp_build_tgsi_aos(struct gallivm_state
> *gallivm,
>        LLVMDumpModule(module);
>     }
>  
> -   FREE(bld.instructions);
>  }
>  
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index 1ad0b74..f7e15db 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -47,6 +47,7 @@
>  #include "tgsi/tgsi_parse.h"
>  #include "tgsi/tgsi_util.h"
>  #include "tgsi/tgsi_scan.h"
> +#include "lp_bld_action.h"
>  #include "lp_bld_type.h"
>  #include "lp_bld_const.h"
>  #include "lp_bld_arit.h"
> @@ -63,97 +64,6 @@
>  #include "lp_bld_printf.h"
>  
>  
> -#define NUM_CHANNELS 4
> -
> -#define LP_MAX_INSTRUCTIONS 256
> -
> -
> -struct lp_exec_mask {
> -   struct lp_build_context *bld;
> -
> -   boolean has_mask;
> -
> -   LLVMTypeRef int_vec_type;
> -
> -   LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
> -   int cond_stack_size;
> -   LLVMValueRef cond_mask;
> -
> -   LLVMBasicBlockRef loop_block;
> -   LLVMValueRef cont_mask;
> -   LLVMValueRef break_mask;
> -   LLVMValueRef break_var;
> -   struct {
> -      LLVMBasicBlockRef loop_block;
> -      LLVMValueRef cont_mask;
> -      LLVMValueRef break_mask;
> -      LLVMValueRef break_var;
> -   } loop_stack[LP_MAX_TGSI_NESTING];
> -   int loop_stack_size;
> -
> -   LLVMValueRef ret_mask;
> -   struct {
> -      int pc;
> -      LLVMValueRef ret_mask;
> -   } call_stack[LP_MAX_TGSI_NESTING];
> -   int call_stack_size;
> -
> -   LLVMValueRef exec_mask;
> -};
> -
> -struct lp_build_tgsi_soa_context
> -{
> -   struct lp_build_context base;
> -
> -   /* Builder for vector integer masks and indices */
> -   struct lp_build_context uint_bld;
> -
> -   /* Builder for scalar elements of shader's data type (float) */
> -   struct lp_build_context elem_bld;
> -
> -   LLVMValueRef consts_ptr;
> -   const LLVMValueRef *pos;
> -   const LLVMValueRef (*inputs)[NUM_CHANNELS];
> -   LLVMValueRef (*outputs)[NUM_CHANNELS];
> -
> -   const struct lp_build_sampler_soa *sampler;
> -
> -   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
> -   LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
> -   LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
> -   LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
> -
> -   /* We allocate/use this array of temps if (1 <<
> TGSI_FILE_TEMPORARY) is
> -    * set in the indirect_files field.
> -    * The temps[] array above is unused then.
> -    */
> -   LLVMValueRef temps_array;
> -
> -   /* We allocate/use this array of output if (1 <<
> TGSI_FILE_OUTPUT) is
> -    * set in the indirect_files field.
> -    * The outputs[] array above is unused then.
> -    */
> -   LLVMValueRef outputs_array;
> -
> -   /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT)
> is
> -    * set in the indirect_files field.
> -    * The inputs[] array above is unused then.
> -    */
> -   LLVMValueRef inputs_array;
> -
> -   LLVMValueRef system_values_array;
> -
> -   const struct tgsi_shader_info *info;
> -   /** bitmask indicating which register files are accessed
> indirectly */
> -   unsigned indirect_files;
> -
> -   struct lp_build_mask_context *mask;
> -   struct lp_exec_mask exec_mask;
> -
> -   struct tgsi_full_instruction *instructions;
> -   uint max_instructions;
> -};
> -
>  static void lp_exec_mask_init(struct lp_exec_mask *mask, struct
>  lp_build_context *bld)
>  {
>     mask->bld = bld;
> @@ -438,15 +348,15 @@ static void lp_exec_mask_endsub(struct
> lp_exec_mask *mask, int *pc)
>   * \param index  which temporary register
>   * \param chan  which channel of the temp register.
>   */
> -static LLVMValueRef
> -get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
> +LLVMValueRef
> +lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
>               unsigned index,
>               unsigned chan)
>  {
> -   LLVMBuilderRef builder = bld->base.gallivm->builder;
> +   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
>     assert(chan < 4);
>     if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
> -      LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm,
> index * 4 + chan);
> +      LLVMValueRef lindex =
> lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
>        return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1,
>        "");
>     }
>     else {
> @@ -460,15 +370,15 @@ get_temp_ptr(struct lp_build_tgsi_soa_context
> *bld,
>   * \param index  which output register
>   * \param chan  which channel of the output register.
>   */
> -static LLVMValueRef
> -get_output_ptr(struct lp_build_tgsi_soa_context *bld,
> +LLVMValueRef
> +lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
>                 unsigned index,
>                 unsigned chan)
>  {
> -   LLVMBuilderRef builder = bld->base.gallivm->builder;
> +   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
>     assert(chan < 4);
>     if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
> -      LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm,
> +      LLVMValueRef lindex =
> lp_build_const_int32(bld->bld_base.base.gallivm,
>                                                   index * 4 + chan);
>        return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1,
>        "");
>     }
> @@ -487,15 +397,15 @@ build_gather(struct lp_build_tgsi_soa_context
> *bld,
>               LLVMValueRef base_ptr,
>               LLVMValueRef indexes)
>  {
> -   LLVMBuilderRef builder = bld->base.gallivm->builder;
> -   LLVMValueRef res = bld->base.undef;
> +   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
> +   LLVMValueRef res = bld->bld_base.base.undef;
>     unsigned i;
>  
>     /*
>      * Loop over elements of index_vec, load scalar value, insert it
>      into 'res'.
>      */
> -   for (i = 0; i < bld->base.type.length; i++) {
> -      LLVMValueRef ii = lp_build_const_int32(bld->base.gallivm, i);
> +   for (i = 0; i < bld->bld_base.base.type.length; i++) {
> +      LLVMValueRef ii =
> lp_build_const_int32(bld->bld_base.base.gallivm, i);
>        LLVMValueRef index = LLVMBuildExtractElement(builder,
>                                                     indexes, ii, "");
>        LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
> @@ -520,7 +430,7 @@ emit_mask_scatter(struct
> lp_build_tgsi_soa_context *bld,
>                    struct lp_exec_mask *mask,
>                    LLVMValueRef pred)
>  {
> -   struct gallivm_state *gallivm = bld->base.gallivm;
> +   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
>     LLVMBuilderRef builder = gallivm->builder;
>     unsigned i;
>  
> @@ -537,7 +447,7 @@ emit_mask_scatter(struct
> lp_build_tgsi_soa_context *bld,
>     /*
>      * Loop over elements of index_vec, store scalar value.
>      */
> -   for (i = 0; i < bld->base.type.length; i++) {
> +   for (i = 0; i < bld->bld_base.base.type.length; i++) {
>        LLVMValueRef ii = lp_build_const_int32(gallivm, i);
>        LLVMValueRef index = LLVMBuildExtractElement(builder, indexes,
>        ii, "");
>        LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
>        &index, 1, "scatter_ptr");
> @@ -573,7 +483,7 @@ get_indirect_index(struct
> lp_build_tgsi_soa_context *bld,
>                     unsigned reg_file, unsigned reg_index,
>                     const struct tgsi_src_register *indirect_reg)
>  {
> -   LLVMBuilderRef builder = bld->base.gallivm->builder;
> +   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
>     struct lp_build_context *uint_bld = &bld->uint_bld;
>     /* always use X component of address register */
>     unsigned swizzle = indirect_reg->SwizzleX;
> @@ -584,7 +494,7 @@ get_indirect_index(struct
> lp_build_tgsi_soa_context *bld,
>  
>     assert(bld->indirect_files & (1 << reg_file));
>  
> -   base = lp_build_const_int_vec(bld->base.gallivm, uint_bld->type,
> reg_index);
> +   base = lp_build_const_int_vec(bld->bld_base.base.gallivm,
> uint_bld->type, reg_index);
>  
>     assert(swizzle < 4);
>     rel = LLVMBuildLoad(builder,
> @@ -598,9 +508,9 @@ get_indirect_index(struct
> lp_build_tgsi_soa_context *bld,
>  
>     index = lp_build_add(uint_bld, base, rel);
>  
> -   max_index = lp_build_const_int_vec(bld->base.gallivm,
> +   max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
>                                        uint_bld->type,
> -
>                                      bld->info->file_max[reg_file]);
> +
>                                      bld->bld_base.info->file_max[reg_file]);
>  
>     assert(!uint_bld->type.sign);
>     index = lp_build_min(uint_bld, index, max_index);
> @@ -608,176 +518,198 @@ get_indirect_index(struct
> lp_build_tgsi_soa_context *bld,
>     return index;
>  }
>  
> -
> -/**
> - * Register fetch.
> - */
>  static LLVMValueRef
> -emit_fetch(
> -   struct lp_build_tgsi_soa_context *bld,
> -   const struct tgsi_full_instruction *inst,
> -   unsigned src_op,
> -   const unsigned chan_index )
> +emit_fetch_constant(
> +   struct lp_build_tgsi_context * bld_base,
> +   const struct tgsi_full_src_register * reg,
> +   const unsigned swizzle)
>  {
> -   struct gallivm_state *gallivm = bld->base.gallivm;
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
> +   struct gallivm_state *gallivm = bld_base->base.gallivm;
>     LLVMBuilderRef builder = gallivm->builder;
>     struct lp_build_context *uint_bld = &bld->uint_bld;
> -   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
> -   const unsigned swizzle =
> -      tgsi_util_get_full_src_register_swizzle(reg, chan_index);
> -   LLVMValueRef res;
>     LLVMValueRef indirect_index = NULL;
>  
> -   if (swizzle > 3) {
> -      assert(0 && "invalid swizzle in emit_fetch()");
> -      return bld->base.undef;
> -   }
> +   /* XXX: Handle fetching xyzw components as a vector */
> +   assert(swizzle != ~0);
>  
>     if (reg->Register.Indirect) {
>        indirect_index = get_indirect_index(bld,
>                                            reg->Register.File,
>                                            reg->Register.Index,
>                                            &reg->Indirect);
> -   } else {
> -      assert(reg->Register.Index <=
> bld->info->file_max[reg->Register.File]);
>     }
>  
> -   switch (reg->Register.File) {
> -   case TGSI_FILE_CONSTANT:
> -      if (reg->Register.Indirect) {
> -         LLVMValueRef swizzle_vec =
> -            lp_build_const_int_vec(bld->base.gallivm,
> uint_bld->type, swizzle);
> -         LLVMValueRef index_vec;  /* index into the const buffer */
> +   if (reg->Register.Indirect) {
> +      LLVMValueRef swizzle_vec =
> +         lp_build_const_int_vec(bld->bld_base.base.gallivm,
> uint_bld->type, swizzle);
> +      LLVMValueRef index_vec;  /* index into the const buffer */
>  
> -         /* index_vec = indirect_index * 4 + swizzle */
> -         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
> -         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
> +      /* index_vec = indirect_index * 4 + swizzle */
> +      index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
> +      index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
>  
> -         /* Gather values from the constant buffer */
> -         res = build_gather(bld, bld->consts_ptr, index_vec);
> -      }
> -      else {
> -         LLVMValueRef index;  /* index into the const buffer */
> -         LLVMValueRef scalar, scalar_ptr;
> +      /* Gather values from the constant buffer */
> +      return build_gather(bld, bld->consts_ptr, index_vec);
> +   }
> +   else {
> +      LLVMValueRef index;  /* index into the const buffer */
> +      LLVMValueRef scalar, scalar_ptr;
>  
> -         index = lp_build_const_int32(gallivm, reg->Register.Index*4
> + swizzle);
> +      index = lp_build_const_int32(gallivm, reg->Register.Index*4 +
> swizzle);
>  
> -         scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
> +      scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
>                                     &index, 1, "");
> -         scalar = LLVMBuildLoad(builder, scalar_ptr, "");
> +      scalar = LLVMBuildLoad(builder, scalar_ptr, "");
>  
> -         res = lp_build_broadcast_scalar(&bld->base, scalar);
> -      }
> -      break;
> +      return lp_build_broadcast_scalar(&bld->bld_base.base, scalar);
> +   }
> +}
>  
> -   case TGSI_FILE_IMMEDIATE:
> -      res = bld->immediates[reg->Register.Index][swizzle];
> -      assert(res);
> -      break;
> +static LLVMValueRef
> +emit_fetch_immediate(
> +   struct lp_build_tgsi_context * bld_base,
> +   const struct tgsi_full_src_register * reg,
> +   const unsigned swizzle)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
> +   LLVMValueRef res = bld->immediates[reg->Register.Index][swizzle];
> +   assert(res);
> +   return res;
> +}
>  
> -   case TGSI_FILE_INPUT:
> -      if (reg->Register.Indirect) {
> -         LLVMValueRef swizzle_vec =
> -            lp_build_const_int_vec(gallivm, uint_bld->type,
> swizzle);
> -         LLVMValueRef length_vec =
> -            lp_build_const_int_vec(gallivm, uint_bld->type,
> bld->base.type.length);
> -         LLVMValueRef index_vec;  /* index into the const buffer */
> -         LLVMValueRef inputs_array;
> -         LLVMTypeRef float4_ptr_type;
> +static LLVMValueRef
> +emit_fetch_input(
> +   struct lp_build_tgsi_context * bld_base,
> +   const struct tgsi_full_src_register * reg,
> +   const unsigned swizzle)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
> +   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
> +   LLVMBuilderRef builder = gallivm->builder;
> +   struct lp_build_context *uint_bld = &bld->uint_bld;
> +   LLVMValueRef indirect_index = NULL;
> +   LLVMValueRef res;
>  
> -         /* index_vec = (indirect_index * 4 + swizzle) * length */
> -         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
> -         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
> -         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
> +   if (reg->Register.Indirect) {
> +      indirect_index = get_indirect_index(bld,
> +                                          reg->Register.File,
> +                                          reg->Register.Index,
> +                                          &reg->Indirect);
> +   }
>  
> -         /* cast inputs_array pointer to float* */
> -         float4_ptr_type =
> LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
> -         inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
> +   if (reg->Register.Indirect) {
> +      LLVMValueRef swizzle_vec =
> +         lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
> +      LLVMValueRef length_vec =
> +         lp_build_const_int_vec(gallivm, uint_bld->type,
> bld->bld_base.base.type.length);
> +      LLVMValueRef index_vec;  /* index into the const buffer */
> +      LLVMValueRef inputs_array;
> +      LLVMTypeRef float4_ptr_type;
> +
> +      /* index_vec = (indirect_index * 4 + swizzle) * length */
> +      index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
> +      index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
> +      index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
> +
> +      /* cast inputs_array pointer to float* */
> +      float4_ptr_type =
> LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
> +      inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
>                                           float4_ptr_type, "");
>  
> -         /* Gather values from the temporary register array */
> -         res = build_gather(bld, inputs_array, index_vec);
> -      } else {
> -         if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
> -            LLVMValueRef lindex = lp_build_const_int32(gallivm,
> -                                           reg->Register.Index * 4 +
> swizzle);
> -            LLVMValueRef input_ptr =  LLVMBuildGEP(builder,
> -
>                                                   bld->inputs_array,
> &lindex, 1, "");
> -            res = LLVMBuildLoad(builder, input_ptr, "");
> -         }
> -         else {
> -            res = bld->inputs[reg->Register.Index][swizzle];
> -         }
> -      }
> -      assert(res);
> -      break;
> -
> -   case TGSI_FILE_TEMPORARY:
> -      if (reg->Register.Indirect) {
> -         LLVMValueRef swizzle_vec =
> -            lp_build_const_int_vec(bld->base.gallivm,
> uint_bld->type, swizzle);
> -         LLVMValueRef length_vec =
> -            lp_build_const_int_vec(bld->base.gallivm,
> uint_bld->type,
> -                                   bld->base.type.length);
> -         LLVMValueRef index_vec;  /* index into the const buffer */
> -         LLVMValueRef temps_array;
> -         LLVMTypeRef float4_ptr_type;
> -
> -         /* index_vec = (indirect_index * 4 + swizzle) * length */
> -         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
> -         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
> -         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
> -
> -         /* cast temps_array pointer to float* */
> -         float4_ptr_type =
> LLVMPointerType(LLVMFloatTypeInContext(bld->base.gallivm->context),
> 0);
> -         temps_array = LLVMBuildBitCast(builder, bld->temps_array,
> -                                        float4_ptr_type, "");
> -
> -         /* Gather values from the temporary register array */
> -         res = build_gather(bld, temps_array, index_vec);
> +      /* Gather values from the temporary register array */
> +      res = build_gather(bld, inputs_array, index_vec);
> +   } else {
> +      if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
> +         LLVMValueRef lindex = lp_build_const_int32(gallivm,
> +                                        reg->Register.Index * 4 +
> swizzle);
> +         LLVMValueRef input_ptr =  LLVMBuildGEP(builder,
> +                                                bld->inputs_array,
> &lindex, 1, "");
> +         res = LLVMBuildLoad(builder, input_ptr, "");
>        }
>        else {
> -         LLVMValueRef temp_ptr;
> -         temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
> -         res = LLVMBuildLoad(builder, temp_ptr, "");
> -         if (!res)
> -            return bld->base.undef;
> +         res = bld->inputs[reg->Register.Index][swizzle];
>        }
> -      break;
> -
> -   case TGSI_FILE_SYSTEM_VALUE:
> -      assert(!reg->Register.Indirect);
> -      {
> -         LLVMValueRef index;  /* index into the system value array
> */
> -         LLVMValueRef scalar, scalar_ptr;
> -
> -         index = lp_build_const_int32(gallivm,
> -                                      reg->Register.Index * 4 +
> swizzle);
> -
> -         scalar_ptr = LLVMBuildGEP(builder,
> bld->system_values_array,
> -                                   &index, 1, "");
> -         scalar = LLVMBuildLoad(builder, scalar_ptr, "");
> +   }
> +   assert(res);
> +   return res;
> +}
>  
> -         res = lp_build_broadcast_scalar(&bld->base, scalar);
> -      }
> -      break;
> +static LLVMValueRef
> +emit_fetch_temporary(
> +   struct lp_build_tgsi_context * bld_base,
> +   const struct tgsi_full_src_register * reg,
> +   const unsigned swizzle)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
> +   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
> +   LLVMBuilderRef builder = gallivm->builder;
> +   struct lp_build_context *uint_bld = &bld->uint_bld;
> +   LLVMValueRef indirect_index = NULL;
> +   LLVMValueRef res;
>  
> -   default:
> -      assert(0 && "invalid src register in emit_fetch()");
> -      return bld->base.undef;
> +   if (reg->Register.Indirect) {
> +      indirect_index = get_indirect_index(bld,
> +                                          reg->Register.File,
> +                                          reg->Register.Index,
> +                                          &reg->Indirect);
>     }
>  
> -   if (reg->Register.Absolute) {
> -      res = lp_build_abs( &bld->base, res );
> +   if (reg->Register.Indirect) {
> +      LLVMValueRef swizzle_vec =
> +         lp_build_const_int_vec(bld->bld_base.base.gallivm,
> uint_bld->type, swizzle);
> +      LLVMValueRef length_vec =
> +         lp_build_const_int_vec(bld->bld_base.base.gallivm,
> uint_bld->type,
> +                                bld->bld_base.base.type.length);
> +      LLVMValueRef index_vec;  /* index into the const buffer */
> +      LLVMValueRef temps_array;
> +      LLVMTypeRef float4_ptr_type;
> +
> +      /* index_vec = (indirect_index * 4 + swizzle) * length */
> +      index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
> +      index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
> +      index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
> +
> +      /* cast temps_array pointer to float* */
> +      float4_ptr_type =
> LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context),
> 0);
> +      temps_array = LLVMBuildBitCast(builder, bld->temps_array,
> +                                     float4_ptr_type, "");
> +
> +      /* Gather values from the temporary register array */
> +      res = build_gather(bld, temps_array, index_vec);
>     }
> -
> -   if (reg->Register.Negate) {
> -      res = lp_build_negate( &bld->base, res );
> +   else {
> +      LLVMValueRef temp_ptr;
> +      temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
> swizzle);
> +      res = LLVMBuildLoad(builder, temp_ptr, "");
> +      if (!res)
> +         return bld->bld_base.base.undef;
>     }
>  
>     return res;
>  }
>  
> +static LLVMValueRef
> +emit_fetch_system_value(
> +   struct lp_build_tgsi_context * bld_base,
> +   const struct tgsi_full_src_register * reg,
> +   const unsigned swizzle)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
> +   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
> +   LLVMBuilderRef builder = gallivm->builder;
> +   LLVMValueRef index;  /* index into the system value array */
> +   LLVMValueRef scalar, scalar_ptr;
> +
> +   assert(!reg->Register.Indirect);
> +
> +   index = lp_build_const_int32(gallivm, reg->Register.Index * 4 +
> swizzle);
> +
> +   scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array,
> &index, 1, "");
> +   scalar = LLVMBuildLoad(builder, scalar_ptr, "");
> +
> +   return lp_build_broadcast_scalar(&bld->bld_base.base, scalar);
> +}
>  
>  /**
>   * Register fetch with derivatives.
> @@ -785,27 +717,21 @@ emit_fetch(
>  static void
>  emit_fetch_deriv(
>     struct lp_build_tgsi_soa_context *bld,
> -   const struct tgsi_full_instruction *inst,
> -   unsigned index,
> -   const unsigned chan_index,
> +   LLVMValueRef src,
>     LLVMValueRef *res,
>     LLVMValueRef *ddx,
>     LLVMValueRef *ddy)
>  {
> -   LLVMValueRef src;
> -
> -   src = emit_fetch(bld, inst, index, chan_index);
> -
>     if(res)
>        *res = src;
>  
>     /* TODO: use interpolation coeffs for inputs */
>  
>     if(ddx)
> -      *ddx = lp_build_ddx(&bld->base, src);
> +      *ddx = lp_build_ddx(&bld->bld_base.base, src);
>  
>     if(ddy)
> -      *ddy = lp_build_ddy(&bld->base, src);
> +      *ddy = lp_build_ddy(&bld->bld_base.base, src);
>  }
>  
>  
> @@ -818,7 +744,7 @@ emit_fetch_predicate(
>     const struct tgsi_full_instruction *inst,
>     LLVMValueRef *pred)
>  {
> -   LLVMBuilderRef builder = bld->base.gallivm->builder;
> +   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
>     unsigned index;
>     unsigned char swizzles[4];
>     LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
> @@ -858,11 +784,11 @@ emit_fetch_predicate(
>            * is needlessly causing two comparisons due to storing the
>            intermediate
>            * result as float vector instead of an integer mask
>            vector.
>            */
> -         value = lp_build_compare(bld->base.gallivm,
> -                                  bld->base.type,
> +         value = lp_build_compare(bld->bld_base.base.gallivm,
> +                                  bld->bld_base.base.type,
>                                    PIPE_FUNC_NOTEQUAL,
>                                    value,
> -                                  bld->base.zero);
> +                                  bld->bld_base.base.zero);
>           if (inst->Predicate.Negate) {
>              value = LLVMBuildNot(builder, value, "");
>           }
> @@ -881,15 +807,16 @@ emit_fetch_predicate(
>   * Register store.
>   */
>  static void
> -emit_store(
> -   struct lp_build_tgsi_soa_context *bld,
> +emit_store_chan(
> +   struct lp_build_tgsi_context *bld_base,
>     const struct tgsi_full_instruction *inst,
>     unsigned index,
>     unsigned chan_index,
>     LLVMValueRef pred,
>     LLVMValueRef value)
>  {
> -   struct gallivm_state *gallivm = bld->base.gallivm;
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
> +   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
>     LLVMBuilderRef builder = gallivm->builder;
>     const struct tgsi_full_dst_register *reg = &inst->Dst[index];
>     struct lp_build_context *uint_bld = &bld->uint_bld;
> @@ -900,13 +827,13 @@ emit_store(
>        break;
>  
>     case TGSI_SAT_ZERO_ONE:
> -      value = lp_build_max(&bld->base, value, bld->base.zero);
> -      value = lp_build_min(&bld->base, value, bld->base.one);
> +      value = lp_build_max(&bld->bld_base.base, value,
> bld->bld_base.base.zero);
> +      value = lp_build_min(&bld->bld_base.base, value,
> bld->bld_base.base.one);
>        break;
>  
>     case TGSI_SAT_MINUS_PLUS_ONE:
> -      value = lp_build_max(&bld->base, value,
> lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0));
> -      value = lp_build_min(&bld->base, value, bld->base.one);
> +      value = lp_build_max(&bld->bld_base.base, value,
> lp_build_const_vec(bld->bld_base.base.gallivm,
> bld->bld_base.base.type, -1.0));
> +      value = lp_build_min(&bld->bld_base.base, value,
> bld->bld_base.base.one);
>        break;
>  
>     default:
> @@ -919,7 +846,8 @@ emit_store(
>                                            reg->Register.Index,
>                                            &reg->Indirect);
>     } else {
> -      assert(reg->Register.Index <=
> bld->info->file_max[reg->Register.File]);
> +      assert(reg->Register.Index <=
> +
>                             bld->bld_base.info->file_max[reg->Register.File]);
>     }
>  
>     switch( reg->Register.File ) {
> @@ -928,7 +856,7 @@ emit_store(
>           LLVMValueRef chan_vec =
>              lp_build_const_int_vec(gallivm, uint_bld->type,
>              chan_index);
>           LLVMValueRef length_vec =
> -            lp_build_const_int_vec(gallivm, uint_bld->type,
> bld->base.type.length);
> +            lp_build_const_int_vec(gallivm, uint_bld->type,
> bld->bld_base.base.type.length);
>           LLVMValueRef index_vec;  /* indexes into the temp registers
>           */
>           LLVMValueRef outputs_array;
>           LLVMValueRef pixel_offsets;
> @@ -937,7 +865,7 @@ emit_store(
>  
>           /* build pixel offset vector: {0, 1, 2, 3, ...} */
>           pixel_offsets = uint_bld->undef;
> -         for (i = 0; i < bld->base.type.length; i++) {
> +         for (i = 0; i < bld->bld_base.base.type.length; i++) {
>              LLVMValueRef ii = lp_build_const_int32(gallivm, i);
>              pixel_offsets = LLVMBuildInsertElement(builder,
>              pixel_offsets,
>                                                     ii, ii, "");
> @@ -959,7 +887,7 @@ emit_store(
>                             &bld->exec_mask, pred);
>        }
>        else {
> -         LLVMValueRef out_ptr = get_output_ptr(bld,
> reg->Register.Index,
> +         LLVMValueRef out_ptr = lp_get_output_ptr(bld,
> reg->Register.Index,
>                                                 chan_index);
>           lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr);
>        }
> @@ -971,7 +899,7 @@ emit_store(
>              lp_build_const_int_vec(gallivm, uint_bld->type,
>              chan_index);
>           LLVMValueRef length_vec =
>              lp_build_const_int_vec(gallivm, uint_bld->type,
> -                                   bld->base.type.length);
> +                                   bld->bld_base.base.type.length);
>           LLVMValueRef index_vec;  /* indexes into the temp registers
>           */
>           LLVMValueRef temps_array;
>           LLVMValueRef pixel_offsets;
> @@ -980,7 +908,7 @@ emit_store(
>  
>           /* build pixel offset vector: {0, 1, 2, 3, ...} */
>           pixel_offsets = uint_bld->undef;
> -         for (i = 0; i < bld->base.type.length; i++) {
> +         for (i = 0; i < bld->bld_base.base.type.length; i++) {
>              LLVMValueRef ii = lp_build_const_int32(gallivm, i);
>              pixel_offsets = LLVMBuildInsertElement(builder,
>              pixel_offsets,
>                                                     ii, ii, "");
> @@ -1002,7 +930,7 @@ emit_store(
>                             &bld->exec_mask, pred);
>        }
>        else {
> -         LLVMValueRef temp_ptr = get_temp_ptr(bld,
> reg->Register.Index,
> +         LLVMValueRef temp_ptr = lp_get_temp_ptr_soa(bld,
> reg->Register.Index,
>                                                chan_index);
>           lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
>        }
> @@ -1023,6 +951,27 @@ emit_store(
>     }
>  }
>  
> +static void
> +emit_store(
> +   struct lp_build_tgsi_context * bld_base,
> +   const struct tgsi_full_instruction * inst,
> +   const struct tgsi_opcode_info * info,
> +   LLVMValueRef dst[4])
> +
> +{
> +   unsigned chan_index;
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
> +
> +   if(info->num_dst) {
> +      LLVMValueRef pred[TGSI_NUM_CHANNELS];
> +
> +      emit_fetch_predicate( bld, inst, pred );
> +
> +      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> +         emit_store_chan(bld_base, inst, 0, chan_index,
> pred[chan_index], dst[chan_index]);
> +      }
> +   }
> +}
>  
>  /**
>   * High-level instruction translators.
> @@ -1034,7 +983,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
>            enum lp_build_tex_modifier modifier,
>            LLVMValueRef *texel)
>  {
> -   LLVMBuilderRef builder = bld->base.gallivm->builder;
> +   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
>     unsigned unit;
>     LLVMValueRef lod_bias, explicit_lod;
>     LLVMValueRef oow = NULL;
> @@ -1047,7 +996,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
>     if (!bld->sampler) {
>        _debug_printf("warning: found texture instruction but no
>        sampler generator supplied\n");
>        for (i = 0; i < 4; i++) {
> -         texel[i] = bld->base.undef;
> +         texel[i] = bld->bld_base.base.undef;
>        }
>        return;
>     }
> @@ -1079,12 +1028,12 @@ emit_tex( struct lp_build_tgsi_soa_context
> *bld,
>     }
>  
>     if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
> -      lod_bias = emit_fetch( bld, inst, 0, 3 );
> +      lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
>        explicit_lod = NULL;
>     }
>     else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
>        lod_bias = NULL;
> -      explicit_lod = emit_fetch( bld, inst, 0, 3 );
> +      explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3
> );
>     }
>     else {
>        lod_bias = NULL;
> @@ -1092,43 +1041,43 @@ emit_tex( struct lp_build_tgsi_soa_context
> *bld,
>     }
>  
>     if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
> -      oow = emit_fetch( bld, inst, 0, 3 );
> -      oow = lp_build_rcp(&bld->base, oow);
> +      oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
> +      oow = lp_build_rcp(&bld->bld_base.base, oow);
>     }
>  
>     for (i = 0; i < num_coords; i++) {
> -      coords[i] = emit_fetch( bld, inst, 0, i );
> +      coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
>        if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
> -         coords[i] = lp_build_mul(&bld->base, coords[i], oow);
> +         coords[i] = lp_build_mul(&bld->bld_base.base, coords[i],
> oow);
>     }
>     for (i = num_coords; i < 3; i++) {
> -      coords[i] = bld->base.undef;
> +      coords[i] = bld->bld_base.base.undef;
>     }
>  
>     if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
> -      LLVMValueRef index0 = lp_build_const_int32(bld->base.gallivm,
> 0);
> +      LLVMValueRef index0 =
> lp_build_const_int32(bld->bld_base.base.gallivm, 0);
>        for (i = 0; i < num_coords; i++) {
> -         LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
> -         LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
> +         LLVMValueRef src1 = lp_build_emit_fetch( &bld->bld_base,
> inst, 1, i );
> +         LLVMValueRef src2 = lp_build_emit_fetch( &bld->bld_base,
> inst, 2, i );
>           ddx[i] = LLVMBuildExtractElement(builder, src1, index0,
>           "");
>           ddy[i] = LLVMBuildExtractElement(builder, src2, index0,
>           "");
>        }
>        unit = inst->Src[3].Register.Index;
>     }  else {
>        for (i = 0; i < num_coords; i++) {
> -         ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
> -         ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
> +         ddx[i] = lp_build_scalar_ddx( &bld->bld_base.base,
> coords[i] );
> +         ddy[i] = lp_build_scalar_ddy( &bld->bld_base.base,
> coords[i] );
>        }
>        unit = inst->Src[1].Register.Index;
>     }
>     for (i = num_coords; i < 3; i++) {
> -      ddx[i] = LLVMGetUndef(bld->base.elem_type);
> -      ddy[i] = LLVMGetUndef(bld->base.elem_type);
> +      ddx[i] = LLVMGetUndef(bld->bld_base.base.elem_type);
> +      ddy[i] = LLVMGetUndef(bld->bld_base.base.elem_type);
>     }
>  
>     bld->sampler->emit_fetch_texel(bld->sampler,
> -                                  bld->base.gallivm,
> -                                  bld->base.type,
> +                                  bld->bld_base.base.gallivm,
> +                                  bld->bld_base.base.type,
>                                    unit, num_coords, coords,
>                                    ddx, ddy,
>                                    lod_bias, explicit_lod,
> @@ -1144,10 +1093,10 @@ near_end_of_shader(struct
> lp_build_tgsi_soa_context *bld,
>     for (i = 0; i < 5; i++) {
>        unsigned opcode;
>  
> -      if (pc + i >= bld->info->num_instructions)
> +      if (pc + i >= bld->bld_base.info->num_instructions)
>  	 return TRUE;
>  
> -      opcode = bld->instructions[pc + i].Instruction.Opcode;
> +      opcode = bld->bld_base.instructions[pc +
> i].Instruction.Opcode;
>  
>        if (opcode == TGSI_OPCODE_END)
>  	 return TRUE;
> @@ -1182,9 +1131,9 @@ emit_kil(
>     const struct tgsi_full_instruction *inst,
>     int pc)
>  {
> -   LLVMBuilderRef builder = bld->base.gallivm->builder;
> +   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
>     const struct tgsi_full_src_register *reg = &inst->Src[0];
> -   LLVMValueRef terms[NUM_CHANNELS];
> +   LLVMValueRef terms[TGSI_NUM_CHANNELS];
>     LLVMValueRef mask;
>     unsigned chan_index;
>  
> @@ -1197,10 +1146,10 @@ emit_kil(
>        swizzle = tgsi_util_get_full_src_register_swizzle( reg,
>        chan_index );
>  
>        /* Check if the component has not been already tested. */
> -      assert(swizzle < NUM_CHANNELS);
> +      assert(swizzle < TGSI_NUM_CHANNELS);
>        if( !terms[swizzle] )
>           /* TODO: change the comparison operator instead of setting
>           the sign */
> -         terms[swizzle] =  emit_fetch(bld, inst, 0, chan_index );
> +         terms[swizzle] =  lp_build_emit_fetch(&bld->bld_base, inst,
> 0, chan_index );
>     }
>  
>     mask = NULL;
> @@ -1211,7 +1160,7 @@ emit_kil(
>           /*
>            * If term < 0 then mask = 0 else mask = ~0.
>            */
> -         chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL,
> terms[chan_index], bld->base.zero);
> +         chan_mask = lp_build_cmp(&bld->bld_base.base,
> PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
>  
>           if(mask)
>              mask = LLVMBuildAnd(builder, mask, chan_mask, "");
> @@ -1237,10 +1186,9 @@ emit_kil(
>   */
>  static void
>  emit_kilp(struct lp_build_tgsi_soa_context *bld,
> -          const struct tgsi_full_instruction *inst,
> -	  int pc)
> +          int pc)
>  {
> -   LLVMBuilderRef builder = bld->base.gallivm->builder;
> +   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
>     LLVMValueRef mask;
>  
>     /* For those channels which are "alive", disable fragment shader
> @@ -1250,7 +1198,7 @@ emit_kilp(struct lp_build_tgsi_soa_context
> *bld,
>        mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask,
>        "kilp");
>     }
>     else {
> -      LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type);
> +      LLVMValueRef zero =
> LLVMConstNull(bld->bld_base.base.int_vec_type);
>        mask = zero;
>     }
>  
> @@ -1268,7 +1216,7 @@ emit_kilp(struct lp_build_tgsi_soa_context
> *bld,
>  static void
>  emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
>  {
> -   struct gallivm_state *gallivm = bld->base.gallivm;
> +   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
>     LLVMBuilderRef builder = gallivm->builder;
>     LLVMValueRef temp_ptr;
>     LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
> @@ -1276,7 +1224,7 @@ emit_dump_temps(struct
> lp_build_tgsi_soa_context *bld)
>     LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
>     LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
>     int index;
> -   int n = bld->info->file_max[TGSI_FILE_TEMPORARY];
> +   int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
>  
>     for (index = 0; index < n; index++) {
>        LLVMValueRef idx = lp_build_const_int32(gallivm, index);
> @@ -1286,7 +1234,7 @@ emit_dump_temps(struct
> lp_build_tgsi_soa_context *bld)
>        lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
>  
>        for (chan = 0; chan < 4; chan++) {
> -         temp_ptr = get_temp_ptr(bld, index, chan);
> +         temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
>           res = LLVMBuildLoad(builder, temp_ptr, "");
>           v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
>           v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
> @@ -1307,31 +1255,32 @@ emit_dump_temps(struct
> lp_build_tgsi_soa_context *bld)
>  
>  
>  
> -static void
> -emit_declaration(
> -   struct lp_build_tgsi_soa_context *bld,
> +void
> +lp_emit_declaration_soa(
> +   struct lp_build_tgsi_context *bld_base,
>     const struct tgsi_full_declaration *decl)
>  {
> -   struct gallivm_state *gallivm = bld->base.gallivm;
> -   LLVMTypeRef vec_type = bld->base.vec_type;
> +   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
> +   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
> +   LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
>     const unsigned first = decl->Range.First;
>     const unsigned last = decl->Range.Last;
>     unsigned idx, i;
>  
>     for (idx = first; idx <= last; ++idx) {
> -      assert(last <= bld->info->file_max[decl->Declaration.File]);
> +      assert(last <=
> bld->bld_base.info->file_max[decl->Declaration.File]);
>        switch (decl->Declaration.File) {
>        case TGSI_FILE_TEMPORARY:
>           assert(idx < LP_MAX_TGSI_TEMPS);
>           if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
> -            for (i = 0; i < NUM_CHANNELS; i++)
> +            for (i = 0; i < TGSI_NUM_CHANNELS; i++)
>                 bld->temps[idx][i] = lp_build_alloca(gallivm,
>                 vec_type, "temp");
>           }
>           break;
>  
>        case TGSI_FILE_OUTPUT:
>           if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
> -            for (i = 0; i < NUM_CHANNELS; i++)
> +            for (i = 0; i < TGSI_NUM_CHANNELS; i++)
>                 bld->outputs[idx][i] = lp_build_alloca(gallivm,
>                                                        vec_type,
>                                                        "output");
>           }
> @@ -1339,13 +1288,13 @@ emit_declaration(
>  
>        case TGSI_FILE_ADDRESS:
>           assert(idx < LP_MAX_TGSI_ADDRS);
> -         for (i = 0; i < NUM_CHANNELS; i++)
> +         for (i = 0; i < TGSI_NUM_CHANNELS; i++)
>              bld->addr[idx][i] = lp_build_alloca(gallivm, vec_type,
>              "addr");
>           break;
>  
>        case TGSI_FILE_PREDICATE:
>           assert(idx < LP_MAX_TGSI_PREDS);
> -         for (i = 0; i < NUM_CHANNELS; i++)
> +         for (i = 0; i < TGSI_NUM_CHANNELS; i++)
>              bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
>                                                   "predicate");
>           break;
> @@ -1358,965 +1307,427 @@ emit_declaration(
>  }
>  
>  
> -/**
> - * Emit LLVM for one TGSI instruction.
> - * \param return TRUE for success, FALSE otherwise
> - */
> -static boolean
> -emit_instruction(
> -   struct lp_build_tgsi_soa_context *bld,
> -   const struct tgsi_full_instruction *inst,
> -   const struct tgsi_opcode_info *info,
> -   int *pc)
> +void lp_emit_immediate_soa(
> +   struct lp_build_tgsi_context *bld_base,
> +   const struct tgsi_full_immediate *imm)
>  {
> -   unsigned chan_index;
> -   LLVMValueRef src0, src1, src2;
> -   LLVMValueRef tmp0, tmp1, tmp2;
> -   LLVMValueRef tmp3 = NULL;
> -   LLVMValueRef tmp4 = NULL;
> -   LLVMValueRef tmp5 = NULL;
> -   LLVMValueRef tmp6 = NULL;
> -   LLVMValueRef tmp7 = NULL;
> -   LLVMValueRef res;
> -   LLVMValueRef dst0[NUM_CHANNELS];
> +   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
> +   struct gallivm_state * gallivm = bld_base->base.gallivm;
>  
> -   /*
> -    * Stores and write masks are handled in a general fashion after
> the long
> -    * instruction opcode switch statement.
> -    *
> -    * Although not stricitly necessary, we avoid generating
> instructions for
> -    * channels which won't be stored, in cases where's that easy.
> For some
> -    * complex instructions, like texture sampling, it is more
> convenient to
> -    * assume a full writemask and then let LLVM optimization passes
> eliminate
> -    * redundant code.
> -    */
> +   /* simply copy the immediate values into the next immediates[]
> slot */
> +   unsigned i;
> +   const uint size = imm->Immediate.NrTokens - 1;
> +   assert(size <= 4);
> +   assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
>  
> -   (*pc)++;
> +   for( i = 0; i < size; ++i )
> +      bld->immediates[bld->num_immediates][i] =
> +              lp_build_const_vec(gallivm, bld_base->base.type,
> imm->u[i].Float);
>  
> -   assert(info->num_dst <= 1);
> -   if (info->num_dst) {
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         dst0[chan_index] = bld->base.undef;
> -      }
> -   }
> +   for( i = size; i < 4; ++i )
> +      bld->immediates[bld->num_immediates][i] =
> bld_base->base.undef;
>  
> -   switch (inst->Instruction.Opcode) {
> -   case TGSI_OPCODE_ARL:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         tmp0 = emit_fetch( bld, inst, 0, chan_index );
> -         tmp0 = lp_build_floor(&bld->base, tmp0);
> -         dst0[chan_index] = tmp0;
> -      }
> -      break;
> +   bld->num_immediates++;
> +}
>  
> -   case TGSI_OPCODE_MOV:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
> -      }
> -      break;
> +static void
> +ddx_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_LIT:
> -      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ) {
> -         dst0[TGSI_CHAN_X] = bld->base.one;
> -      }
> -      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ) {
> -         src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> -         dst0[TGSI_CHAN_Y] = lp_build_max( &bld->base, src0,
> bld->base.zero);
> -      }
> -      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) {
> -         /* XMM[1] = SrcReg[0].yyyy */
> -         tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
> -         /* XMM[1] = max(XMM[1], 0) */
> -         tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
> -         /* XMM[2] = SrcReg[0].wwww */
> -         tmp2 = emit_fetch( bld, inst, 0, TGSI_CHAN_W );
> -         tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
> -         tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> -         tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0,
> bld->base.zero);
> -         dst0[TGSI_CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1,
> bld->base.zero);
> -      }
> -      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) ) {
> -         dst0[TGSI_CHAN_W] = bld->base.one;
> -      }
> -      break;
> +   emit_fetch_deriv(bld, emit_data->args[0], NULL,
> +                    &emit_data->output[emit_data->chan], NULL);
> +}
>  
> -   case TGSI_OPCODE_RCP:
> -   /* TGSI_OPCODE_RECIP */
> -      src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> -      res = lp_build_rcp(&bld->base, src0);
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         dst0[chan_index] = res;
> -      }
> -      break;
> +static void
> +ddy_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_RSQ:
> -   /* TGSI_OPCODE_RECIPSQRT */
> -      src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> -      src0 = lp_build_abs(&bld->base, src0);
> -      res = lp_build_rsqrt(&bld->base, src0);
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         dst0[chan_index] = res;
> -      }
> -      break;
> +   emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
> +                    &emit_data->output[emit_data->chan]);
> +}
>  
> -   case TGSI_OPCODE_EXP:
> -      if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
> -         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ||
> -         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) {
> -         LLVMValueRef *p_exp2_int_part = NULL;
> -         LLVMValueRef *p_frac_part = NULL;
> -         LLVMValueRef *p_exp2 = NULL;
> -
> -         src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> -
> -         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
> -            p_exp2_int_part = &tmp0;
> -         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ))
> -            p_frac_part = &tmp1;
> -         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
> -            p_exp2 = &tmp2;
> -
> -         lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part,
> p_frac_part, p_exp2);
> -
> -         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
> -            dst0[TGSI_CHAN_X] = tmp0;
> -         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ))
> -            dst0[TGSI_CHAN_Y] = tmp1;
> -         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
> -            dst0[TGSI_CHAN_Z] = tmp2;
> -      }
> -      /* dst.w = 1.0 */
> -      if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W )) {
> -         dst0[TGSI_CHAN_W] = bld->base.one;
> -      }
> -      break;
> +static void
> +kilp_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_LOG:
> -      if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
> -         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ||
> -         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) {
> -         LLVMValueRef *p_floor_log2 = NULL;
> -         LLVMValueRef *p_exp = NULL;
> -         LLVMValueRef *p_log2 = NULL;
> -
> -         src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> -         src0 = lp_build_abs( &bld->base, src0 );
> -
> -         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
> -            p_floor_log2 = &tmp0;
> -         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ))
> -            p_exp = &tmp1;
> -         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
> -            p_log2 = &tmp2;
> -
> -         lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2,
> p_log2);
> -
> -         /* dst.x = floor(lg2(abs(src.x))) */
> -         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
> -            dst0[TGSI_CHAN_X] = tmp0;
> -         /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
> -         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y )) {
> -            dst0[TGSI_CHAN_Y] = lp_build_div( &bld->base, src0,
> tmp1);
> -         }
> -         /* dst.z = lg2(abs(src.x)) */
> -         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
> -            dst0[TGSI_CHAN_Z] = tmp2;
> -      }
> -      /* dst.w = 1.0 */
> -      if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W )) {
> -         dst0[TGSI_CHAN_W] = bld->base.one;
> -      }
> -      break;
> +   emit_kilp(bld, bld_base->pc - 1);
> +}
>  
> -   case TGSI_OPCODE_MUL:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         src0 = emit_fetch( bld, inst, 0, chan_index );
> -         src1 = emit_fetch( bld, inst, 1, chan_index );
> -         dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
> -      }
> -      break;
> +static void
> +kil_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_ADD:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         src0 = emit_fetch( bld, inst, 0, chan_index );
> -         src1 = emit_fetch( bld, inst, 1, chan_index );
> -         dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
> -      }
> -      break;
> +   emit_kil(bld, emit_data->inst, bld_base->pc - 1);
> +}
>  
> -   case TGSI_OPCODE_DP3:
> -   /* TGSI_OPCODE_DOT3 */
> -      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> -      tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
> -      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
> -      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
> -      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
> -      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
> -      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> -      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
> -      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
> -      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
> -      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         dst0[chan_index] = tmp0;
> -      }
> -      break;
> +static void
> +tex_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_DP4:
> -   /* TGSI_OPCODE_DOT4 */
> -      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> -      tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
> -      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
> -      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
> -      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
> -      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
> -      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> -      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
> -      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
> -      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
> -      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> -      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_W );
> -      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_W );
> -      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
> -      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         dst0[chan_index] = tmp0;
> -      }
> -      break;
> +   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
> emit_data->output);
> +}
>  
> -   case TGSI_OPCODE_DST:
> -      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) {
> -         dst0[TGSI_CHAN_X] = bld->base.one;
> -      }
> -      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) {
> -         tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
> -         tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
> -         dst0[TGSI_CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
> -      }
> -      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) {
> -         dst0[TGSI_CHAN_Z] = emit_fetch( bld, inst, 0, TGSI_CHAN_Z
> );
> -      }
> -      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) {
> -         dst0[TGSI_CHAN_W] = emit_fetch( bld, inst, 1, TGSI_CHAN_W
> );
> -      }
> -      break;
> +static void
> +txb_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_MIN:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         src0 = emit_fetch( bld, inst, 0, chan_index );
> -         src1 = emit_fetch( bld, inst, 1, chan_index );
> -         dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
> -      }
> -      break;
> +   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
> +            emit_data->output);
> +}
>  
> -   case TGSI_OPCODE_MAX:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         src0 = emit_fetch( bld, inst, 0, chan_index );
> -         src1 = emit_fetch( bld, inst, 1, chan_index );
> -         dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
> -      }
> -      break;
> +static void
> +txd_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_SLT:
> -   /* TGSI_OPCODE_SETLT */
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         src0 = emit_fetch( bld, inst, 0, chan_index );
> -         src1 = emit_fetch( bld, inst, 1, chan_index );
> -         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1
> );
> -         dst0[chan_index] = lp_build_select( &bld->base, tmp0,
> bld->base.one, bld->base.zero );
> -      }
> -      break;
> +   emit_tex(bld, emit_data->inst,
> LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
> +            emit_data->output);
> +}
>  
> -   case TGSI_OPCODE_SGE:
> -   /* TGSI_OPCODE_SETGE */
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         src0 = emit_fetch( bld, inst, 0, chan_index );
> -         src1 = emit_fetch( bld, inst, 1, chan_index );
> -         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0,
> src1 );
> -         dst0[chan_index] = lp_build_select( &bld->base, tmp0,
> bld->base.one, bld->base.zero );
> -      }
> -      break;
> +static void
> +txl_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_MAD:
> -   /* TGSI_OPCODE_MADD */
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         tmp0 = emit_fetch( bld, inst, 0, chan_index );
> -         tmp1 = emit_fetch( bld, inst, 1, chan_index );
> -         tmp2 = emit_fetch( bld, inst, 2, chan_index );
> -         tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
> -         tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
> -         dst0[chan_index] = tmp0;
> -      }
> -      break;
> +   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
> +            emit_data->output);
> +}
>  
> -   case TGSI_OPCODE_SUB:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         tmp0 = emit_fetch( bld, inst, 0, chan_index );
> -         tmp1 = emit_fetch( bld, inst, 1, chan_index );
> -         dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
> -      }
> -      break;
> +static void
> +txp_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_LRP:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         src0 = emit_fetch( bld, inst, 0, chan_index );
> -         src1 = emit_fetch( bld, inst, 1, chan_index );
> -         src2 = emit_fetch( bld, inst, 2, chan_index );
> -         tmp0 = lp_build_sub( &bld->base, src1, src2 );
> -         tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
> -         dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
> -      }
> -      break;
> +   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
> +            emit_data->output);
> +}
>  
> -   case TGSI_OPCODE_CND:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         src0 = emit_fetch( bld, inst, 0, chan_index );
> -         src1 = emit_fetch( bld, inst, 1, chan_index );
> -         src2 = emit_fetch( bld, inst, 2, chan_index );
> -         tmp1 = lp_build_const_vec(bld->base.gallivm,
> bld->base.type, 0.5);
> -         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2,
> tmp1);
> -         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0,
> src1 );
> -      }
> -      break;
> -
> -   case TGSI_OPCODE_DP2A:
> -      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );  /* xmm0 =
> src[0].x */
> -      tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );  /* xmm1 =
> src[1].x */
> -      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /*
> xmm0 = xmm0 * xmm1 */
> -      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );  /* xmm1 =
> src[0].y */
> -      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );  /* xmm2 =
> src[1].y */
> -      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /*
> xmm1 = xmm1 * xmm2 */
> -      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /*
> xmm0 = xmm0 + xmm1 */
> -      tmp1 = emit_fetch( bld, inst, 2, TGSI_CHAN_X );  /* xmm1 =
> src[2].x */
> -      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /*
> xmm0 = xmm0 + xmm1 */
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
> -      }
> -      break;
> +static void
> +cal_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_FRC:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         src0 = emit_fetch( bld, inst, 0, chan_index );
> -         tmp0 = lp_build_floor(&bld->base, src0);
> -         tmp0 = lp_build_sub(&bld->base, src0, tmp0);
> -         dst0[chan_index] = tmp0;
> -      }
> -      break;
> +   lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
> +                     &bld_base->pc);
> +}
>  
> -   case TGSI_OPCODE_CLAMP:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         tmp0 = emit_fetch( bld, inst, 0, chan_index );
> -         src1 = emit_fetch( bld, inst, 1, chan_index );
> -         src2 = emit_fetch( bld, inst, 2, chan_index );
> -         tmp0 = lp_build_max(&bld->base, tmp0, src1);
> -         tmp0 = lp_build_min(&bld->base, tmp0, src2);
> -         dst0[chan_index] = tmp0;
> -      }
> -      break;
> +static void
> +ret_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_FLR:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         tmp0 = emit_fetch( bld, inst, 0, chan_index );
> -         dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
> -      }
> -      break;
> +   lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
> +}
>  
> -   case TGSI_OPCODE_ROUND:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         tmp0 = emit_fetch( bld, inst, 0, chan_index );
> -         dst0[chan_index] = lp_build_round(&bld->base, tmp0);
> -      }
> -      break;
> +static void
> +brk_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_EX2: {
> -      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> -      tmp0 = lp_build_exp2( &bld->base, tmp0);
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         dst0[chan_index] = tmp0;
> -      }
> -      break;
> -   }
> +   lp_exec_break(&bld->exec_mask);
> +}
>  
> -   case TGSI_OPCODE_LG2:
> -      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> -      tmp0 = lp_build_log2( &bld->base, tmp0);
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         dst0[chan_index] = tmp0;
> -      }
> -      break;
> +static void
> +if_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   LLVMValueRef tmp;
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_POW:
> -      src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> -      src1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
> -      res = lp_build_pow( &bld->base, src0, src1 );
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         dst0[chan_index] = res;
> -      }
> -      break;
> +   tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
> +                      emit_data->args[0], bld->bld_base.base.zero);
> +   lp_exec_mask_cond_push(&bld->exec_mask, tmp);
> +}
>  
> -   case TGSI_OPCODE_XPD:
> -      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
> -         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ) {
> -         tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
> -         tmp3 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
> -      }
> -      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
> -         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) {
> -         tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
> -         tmp4 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
> -      }
> -      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) {
> -         tmp2 = tmp0;
> -         tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
> -         tmp5 = tmp3;
> -         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
> -         tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
> -         dst0[TGSI_CHAN_X] = tmp2;
> -      }
> -      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ||
> -         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) {
> -         tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
> -         tmp5 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> -      }
> -      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) {
> -         tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
> -         tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
> -         tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
> -         dst0[TGSI_CHAN_Y] = tmp3;
> -      }
> -      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) {
> -         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
> -         tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
> -         tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
> -         dst0[TGSI_CHAN_Z] = tmp5;
> -      }
> -      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) {
> -         dst0[TGSI_CHAN_W] = bld->base.one;
> -      }
> -      break;
> +static void
> +bgnloop_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_ABS:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         tmp0 = emit_fetch( bld, inst, 0, chan_index );
> -         dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
> -      }
> -      break;
> +   lp_exec_bgnloop(&bld->exec_mask);
> +}
>  
> -   case TGSI_OPCODE_RCC:
> -      /* deprecated? */
> -      assert(0);
> -      return FALSE;
> -
> -   case TGSI_OPCODE_DPH:
> -      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> -      tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
> -      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
> -      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
> -      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
> -      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
> -      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> -      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
> -      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
> -      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
> -      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> -      tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_W );
> -      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         dst0[chan_index] = tmp0;
> -      }
> -      break;
> +static void
> +bgnsub_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_COS:
> -      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> -      tmp0 = lp_build_cos( &bld->base, tmp0 );
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         dst0[chan_index] = tmp0;
> -      }
> -      break;
> +   lp_exec_mask_bgnsub(&bld->exec_mask);
> +}
>  
> -   case TGSI_OPCODE_DDX:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         emit_fetch_deriv( bld, inst, 0, chan_index, NULL,
> &dst0[chan_index], NULL);
> -      }
> -      break;
> +static void
> +else_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_DDY:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL,
> &dst0[chan_index]);
> -      }
> -      break;
> +   lp_exec_mask_cond_invert(&bld->exec_mask);
> +}
>  
> -   case TGSI_OPCODE_KILP:
> -      /* predicated kill */
> -      emit_kilp( bld, inst, (*pc)-1 );
> -      break;
> +static void
> +endif_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_KIL:
> -      /* conditional kill */
> -      emit_kil( bld, inst, (*pc)-1 );
> -      break;
> +   lp_exec_mask_cond_pop(&bld->exec_mask);
> +}
>  
> -   case TGSI_OPCODE_PK2H:
> -      return FALSE;
> -      break;
> +static void
> +endloop_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_PK2US:
> -      return FALSE;
> -      break;
> +   lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
> +}
>  
> -   case TGSI_OPCODE_PK4B:
> -      return FALSE;
> -      break;
> +static void
> +endsub_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_PK4UB:
> -      return FALSE;
> -      break;
> +   lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
> +}
>  
> -   case TGSI_OPCODE_RFL:
> -      return FALSE;
> -      break;
> +static void
> +cont_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_SEQ:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         src0 = emit_fetch( bld, inst, 0, chan_index );
> -         src1 = emit_fetch( bld, inst, 1, chan_index );
> -         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0,
> src1 );
> -         dst0[chan_index] = lp_build_select( &bld->base, tmp0,
> bld->base.one, bld->base.zero );
> -      }
> -      break;
> +   lp_exec_continue(&bld->exec_mask);
> +}
>  
> -   case TGSI_OPCODE_SFL:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         dst0[chan_index] = bld->base.zero;
> -      }
> -      break;
> +/* XXX: Refactor and move it to lp_bld_action.c
> + *
> + * XXX: What do the comments about xmm registers mean?  Maybe they
> are left over
> + * from old code, but there is no garauntee that LLVM will use those
> registers
> + * for this code.
> + *
> + * XXX: There should be no calls to lp_build_emit_fetch in this
> function.  This
> + * should be handled by the emit_data->fetch_args function. */
> +static void
> +nrm_emit(
> +   const struct lp_build_opcode_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   LLVMValueRef tmp0, tmp1;
> +   LLVMValueRef tmp4 = NULL;
> +   LLVMValueRef tmp5 = NULL;
> +   LLVMValueRef tmp6 = NULL;
> +   LLVMValueRef tmp7 = NULL;
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   case TGSI_OPCODE_SGT:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         src0 = emit_fetch( bld, inst, 0, chan_index );
> -         src1 = emit_fetch( bld, inst, 1, chan_index );
> -         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0,
> src1 );
> -         dst0[chan_index] = lp_build_select( &bld->base, tmp0,
> bld->base.one, bld->base.zero );
> -      }
> -      break;
> +   uint dims = (emit_data->inst->Instruction.Opcode ==
> TGSI_OPCODE_NRM) ? 3 : 4;
>  
> -   case TGSI_OPCODE_SIN:
> -      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> -      tmp0 = lp_build_sin( &bld->base, tmp0 );
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         dst0[chan_index] = tmp0;
> -      }
> -      break;
> +  if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
> +      TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
> +      TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
> +      (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) &&
> dims == 4)) {
>  
> -   case TGSI_OPCODE_SLE:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         src0 = emit_fetch( bld, inst, 0, chan_index );
> -         src1 = emit_fetch( bld, inst, 1, chan_index );
> -         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0,
> src1 );
> -         dst0[chan_index] = lp_build_select( &bld->base, tmp0,
> bld->base.one, bld->base.zero );
> -      }
> -      break;
> +      /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt()
> above). */
>  
> -   case TGSI_OPCODE_SNE:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         src0 = emit_fetch( bld, inst, 0, chan_index );
> -         src1 = emit_fetch( bld, inst, 1, chan_index );
> -         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0,
> src1 );
> -         dst0[chan_index] = lp_build_select( &bld->base, tmp0,
> bld->base.one, bld->base.zero );
> +      /* xmm4 = src.x */
> +      /* xmm0 = src.x * src.x */
> +      tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0,
> TGSI_CHAN_X);
> +      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst,
> TGSI_CHAN_X)) {
> +         tmp4 = tmp0;
>        }
> -      break;
> +      tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
>  
> -   case TGSI_OPCODE_STR:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         dst0[chan_index] = bld->base.one;
> +      /* xmm5 = src.y */
> +      /* xmm0 = xmm0 + src.y * src.y */
> +      tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0,
> TGSI_CHAN_Y);
> +      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst,
> TGSI_CHAN_Y)) {
> +         tmp5 = tmp1;
>        }
> -      break;
> -
> -   case TGSI_OPCODE_TEX:
> -      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 );
> -      break;
> -
> -   case TGSI_OPCODE_TXD:
> -      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0
> );
> -      break;
> -
> -   case TGSI_OPCODE_UP2H:
> -      /* deprecated */
> -      assert (0);
> -      return FALSE;
> -      break;
> -
> -   case TGSI_OPCODE_UP2US:
> -      /* deprecated */
> -      assert(0);
> -      return FALSE;
> -      break;
> -
> -   case TGSI_OPCODE_UP4B:
> -      /* deprecated */
> -      assert(0);
> -      return FALSE;
> -      break;
> -
> -   case TGSI_OPCODE_UP4UB:
> -      /* deprecated */
> -      assert(0);
> -      return FALSE;
> -      break;
> -
> -   case TGSI_OPCODE_X2D:
> -      /* deprecated? */
> -      assert(0);
> -      return FALSE;
> -      break;
> -
> -   case TGSI_OPCODE_ARA:
> -      /* deprecated */
> -      assert(0);
> -      return FALSE;
> -      break;
> +      tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
> +      tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
>  
> -   case TGSI_OPCODE_ARR:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         tmp0 = emit_fetch( bld, inst, 0, chan_index );
> -         tmp0 = lp_build_round(&bld->base, tmp0);
> -         dst0[chan_index] = tmp0;
> +      /* xmm6 = src.z */
> +      /* xmm0 = xmm0 + src.z * src.z */
> +      tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0,
> TGSI_CHAN_Z);
> +      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst,
> TGSI_CHAN_Z)) {
> +         tmp6 = tmp1;
>        }
> -      break;
> -
> -   case TGSI_OPCODE_BRA:
> -      /* deprecated */
> -      assert(0);
> -      return FALSE;
> -      break;
> -
> -   case TGSI_OPCODE_CAL:
> -      lp_exec_mask_call(&bld->exec_mask,
> -                        inst->Label.Label,
> -                        pc);
> +      tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
> +      tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
>  
> -      break;
> -
> -   case TGSI_OPCODE_RET:
> -      lp_exec_mask_ret(&bld->exec_mask, pc);
> -      break;
> -
> -   case TGSI_OPCODE_END:
> -      if (0) {
> -         /* for debugging */
> -         emit_dump_temps(bld);
> +      if (dims == 4) {
> +         /* xmm7 = src.w */
> +         /* xmm0 = xmm0 + src.w * src.w */
> +         tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst,
> 0, TGSI_CHAN_W);
> +         if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst,
> TGSI_CHAN_W)) {
> +            tmp7 = tmp1;
> +         }
> +         tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
> +         tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
>        }
> -      *pc = -1;
> -      break;
> -
> -   case TGSI_OPCODE_SSG:
> -   /* TGSI_OPCODE_SGN */
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         tmp0 = emit_fetch( bld, inst, 0, chan_index );
> -         dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
> +      /* xmm1 = 1 / sqrt(xmm0) */
> +      tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
> +       /* dst.x = xmm1 * src.x */
> +      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst,
> TGSI_CHAN_X)) {
> +         emit_data->output[TGSI_CHAN_X] = lp_build_mul(
> &bld->bld_base.base, tmp4, tmp1);
>        }
> -      break;
> -
> -   case TGSI_OPCODE_CMP:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         src0 = emit_fetch( bld, inst, 0, chan_index );
> -         src1 = emit_fetch( bld, inst, 1, chan_index );
> -         src2 = emit_fetch( bld, inst, 2, chan_index );
> -         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0,
> bld->base.zero );
> -         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1,
> src2);
> +      /* dst.y = xmm1 * src.y */
> +      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst,
> TGSI_CHAN_Y)) {
> +         emit_data->output[TGSI_CHAN_Y] = lp_build_mul(
> &bld->bld_base.base, tmp5, tmp1);
>        }
> -      break;
>  
> -   case TGSI_OPCODE_SCS:
> -      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) {
> -         tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> -         dst0[TGSI_CHAN_X] = lp_build_cos( &bld->base, tmp0 );
> -      }
> -      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) {
> -         tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
> -         dst0[TGSI_CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
> +      /* dst.z = xmm1 * src.z */
> +      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst,
> TGSI_CHAN_Z)) {
> +         emit_data->output[TGSI_CHAN_Z] = lp_build_mul(
> &bld->bld_base.base, tmp6, tmp1);
>        }
> -      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) {
> -         dst0[TGSI_CHAN_Z] = bld->base.zero;
> +      /* dst.w = xmm1 * src.w */
> +      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)
> && dims == 4) {
> +         emit_data->output[TGSI_CHAN_W] = lp_build_mul(
> &bld->bld_base.base, tmp7, tmp1);
>        }
> -      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) {
> -         dst0[TGSI_CHAN_W] = bld->base.one;
> -      }
> -      break;
> -
> -   case TGSI_OPCODE_TXB:
> -      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 );
> -      break;
> -
> -   case TGSI_OPCODE_NRM:
> -      /* fall-through */
> -   case TGSI_OPCODE_NRM4:
> -      /* 3 or 4-component normalization */
> -      {
> -         uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ?
> 3 : 4;
> -
> -         if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) ||
> -            TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y) ||
> -            TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z) ||
> -             (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W) &&
> dims == 4)) {
> -
> -            /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt()
> above). */
> -
> -            /* xmm4 = src.x */
> -            /* xmm0 = src.x * src.x */
> -            tmp0 = emit_fetch(bld, inst, 0, TGSI_CHAN_X);
> -            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X)) {
> -               tmp4 = tmp0;
> -            }
> -            tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
> -
> -            /* xmm5 = src.y */
> -            /* xmm0 = xmm0 + src.y * src.y */
> -            tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_Y);
> -            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) {
> -               tmp5 = tmp1;
> -            }
> -            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
> -            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> -
> -            /* xmm6 = src.z */
> -            /* xmm0 = xmm0 + src.z * src.z */
> -            tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_Z);
> -            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) {
> -               tmp6 = tmp1;
> -            }
> -            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
> -            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> -
> -            if (dims == 4) {
> -               /* xmm7 = src.w */
> -               /* xmm0 = xmm0 + src.w * src.w */
> -               tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_W);
> -               if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W))
> {
> -                  tmp7 = tmp1;
> -               }
> -               tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
> -               tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
> -            }
> -
> -            /* xmm1 = 1 / sqrt(xmm0) */
> -            tmp1 = lp_build_rsqrt( &bld->base, tmp0);
> -
> -            /* dst.x = xmm1 * src.x */
> -            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X)) {
> -               dst0[TGSI_CHAN_X] = lp_build_mul( &bld->base, tmp4,
> tmp1);
> -            }
> -
> -            /* dst.y = xmm1 * src.y */
> -            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) {
> -               dst0[TGSI_CHAN_Y] = lp_build_mul( &bld->base, tmp5,
> tmp1);
> -            }
> -
> -            /* dst.z = xmm1 * src.z */
> -            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) {
> -               dst0[TGSI_CHAN_Z] = lp_build_mul( &bld->base, tmp6,
> tmp1);
> -            }
> -
> -            /* dst.w = xmm1 * src.w */
> -            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) &&
> dims == 4) {
> -               dst0[TGSI_CHAN_W] = lp_build_mul( &bld->base, tmp7,
> tmp1);
> -            }
> -         }
> -
> -         /* dst.w = 1.0 */
> -         if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W) && dims
> == 3) {
> -            dst0[TGSI_CHAN_W] = bld->base.one;
> -         }
> -      }
> -      break;
> -
> -   case TGSI_OPCODE_DIV:
> -      /* deprecated */
> -      assert( 0 );
> -      return FALSE;
> -      break;
> -
> -   case TGSI_OPCODE_DP2:
> -      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );  /* xmm0 =
> src[0].x */
> -      tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );  /* xmm1 =
> src[1].x */
> -      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /*
> xmm0 = xmm0 * xmm1 */
> -      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );  /* xmm1 =
> src[0].y */
> -      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );  /* xmm2 =
> src[1].y */
> -      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /*
> xmm1 = xmm1 * xmm2 */
> -      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /*
> xmm0 = xmm0 + xmm1 */
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
> -      }
> -      break;
> -
> -   case TGSI_OPCODE_TXL:
> -      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 );
> -      break;
> -
> -   case TGSI_OPCODE_TXP:
> -      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 );
> -      break;
> -
> -   case TGSI_OPCODE_BRK:
> -      lp_exec_break(&bld->exec_mask);
> -      break;
> -
> -   case TGSI_OPCODE_IF:
> -      tmp0 = emit_fetch(bld, inst, 0, TGSI_CHAN_X);
> -      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
> -                          tmp0, bld->base.zero);
> -      lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
> -      break;
> -
> -   case TGSI_OPCODE_BGNLOOP:
> -      lp_exec_bgnloop(&bld->exec_mask);
> -      break;
> -
> -   case TGSI_OPCODE_BGNSUB:
> -      lp_exec_mask_bgnsub(&bld->exec_mask);
> -      break;
> -
> -   case TGSI_OPCODE_ELSE:
> -      lp_exec_mask_cond_invert(&bld->exec_mask);
> -      break;
> -
> -   case TGSI_OPCODE_ENDIF:
> -      lp_exec_mask_cond_pop(&bld->exec_mask);
> -      break;
> -
> -   case TGSI_OPCODE_ENDLOOP:
> -      lp_exec_endloop(bld->base.gallivm, &bld->exec_mask);
> -      break;
> +   }
>  
> -   case TGSI_OPCODE_ENDSUB:
> -      lp_exec_mask_endsub(&bld->exec_mask, pc);
> -      break;
> +   /* dst.w = 1.0 */
> +   if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) &&
> dims == 3) {
> +       emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
> +   }
> +}
>  
> -   case TGSI_OPCODE_PUSHA:
> -      /* deprecated? */
> -      assert(0);
> -      return FALSE;
> -      break;
> +static void emit_prologue(struct lp_build_tgsi_context * bld_base)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
> +   struct gallivm_state * gallivm = bld_base->base.gallivm;
>  
> -   case TGSI_OPCODE_POPA:
> -      /* deprecated? */
> -      assert(0);
> -      return FALSE;
> -      break;
> +   if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
> +      LLVMValueRef array_size =
> +         lp_build_const_int32(gallivm,
> +
>                         bld_base->info->file_max[TGSI_FILE_TEMPORARY]
> * 4 + 4);
> +      bld->temps_array = lp_build_array_alloca(gallivm,
> +
>                                              bld_base->base.vec_type,
> array_size,
> +                                              "temp_array");
> +   }
>  
> -   case TGSI_OPCODE_CEIL:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         tmp0 = emit_fetch( bld, inst, 0, chan_index );
> -         dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
> -      }
> -      break;
> +   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
> +      LLVMValueRef array_size =
> +         lp_build_const_int32(gallivm,
> +
>                            bld_base->info->file_max[TGSI_FILE_OUTPUT]
> * 4 + 4);
> +      bld->outputs_array = lp_build_array_alloca(gallivm,
> +
>                                                bld_base->base.vec_type,
> array_size,
> +                                                "output_array");
> +   }
>  
> -   case TGSI_OPCODE_I2F:
> -      /* deprecated? */
> -      assert(0);
> -      return FALSE;
> -      break;
> +   /* If we have indirect addressing in inputs we need to copy them
> into
> +    * our alloca array to be able to iterate over them */
> +   if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
> +      unsigned index, chan;
> +      LLVMTypeRef vec_type = bld_base->base.vec_type;
> +      LLVMValueRef array_size = lp_build_const_int32(gallivm,
> +            bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
> +      bld->inputs_array = lp_build_array_alloca(gallivm,
> +                                               vec_type, array_size,
> +                                               "input_array");
>  
> -   case TGSI_OPCODE_NOT:
> -      /* deprecated? */
> -      assert(0);
> -      return FALSE;
> -      break;
> +      assert(bld_base->info->num_inputs
> +                        <= bld_base->info->file_max[TGSI_FILE_INPUT]
> + 1);
>  
> -   case TGSI_OPCODE_TRUNC:
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         tmp0 = emit_fetch( bld, inst, 0, chan_index );
> -         dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
> +      for (index = 0; index < bld_base->info->num_inputs; ++index) {
> +         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
> +            LLVMValueRef lindex =
> +               lp_build_const_int32(gallivm, index * 4 + chan);
> +            LLVMValueRef input_ptr =
> +               LLVMBuildGEP(gallivm->builder, bld->inputs_array,
> +                            &lindex, 1, "");
> +            LLVMValueRef value = bld->inputs[index][chan];
> +            if (value)
> +               LLVMBuildStore(gallivm->builder, value, input_ptr);
> +         }
>        }
> -      break;
> -
> -   case TGSI_OPCODE_SHL:
> -      /* deprecated? */
> -      assert(0);
> -      return FALSE;
> -      break;
> -
> -   case TGSI_OPCODE_ISHR:
> -      /* deprecated? */
> -      assert(0);
> -      return FALSE;
> -      break;
> -
> -   case TGSI_OPCODE_AND:
> -      /* deprecated? */
> -      assert(0);
> -      return FALSE;
> -      break;
> -
> -   case TGSI_OPCODE_OR:
> -      /* deprecated? */
> -      assert(0);
> -      return FALSE;
> -      break;
> -
> -   case TGSI_OPCODE_MOD:
> -      /* deprecated? */
> -      assert(0);
> -      return FALSE;
> -      break;
> -
> -   case TGSI_OPCODE_XOR:
> -      /* deprecated? */
> -      assert(0);
> -      return FALSE;
> -      break;
> -
> -   case TGSI_OPCODE_SAD:
> -      /* deprecated? */
> -      assert(0);
> -      return FALSE;
> -      break;
> -
> -   case TGSI_OPCODE_TXF:
> -      /* deprecated? */
> -      assert(0);
> -      return FALSE;
> -      break;
> -
> -   case TGSI_OPCODE_TXQ:
> -      /* deprecated? */
> -      assert(0);
> -      return FALSE;
> -      break;
> -
> -   case TGSI_OPCODE_CONT:
> -      lp_exec_continue(&bld->exec_mask);
> -      break;
> -
> -   case TGSI_OPCODE_EMIT:
> -      return FALSE;
> -      break;
> -
> -   case TGSI_OPCODE_ENDPRIM:
> -      return FALSE;
> -      break;
> +   }
> +}
>  
> -   case TGSI_OPCODE_NOP:
> -      break;
> +static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
> +{
> +   struct lp_build_tgsi_soa_context * bld =
> lp_soa_context(bld_base);
>  
> -   default:
> -      return FALSE;
> +   if (0) {
> +      /* for debugging */
> +      emit_dump_temps(bld);
>     }
> -
> -   if(info->num_dst) {
> -      LLVMValueRef pred[NUM_CHANNELS];
>  
> -      emit_fetch_predicate( bld, inst, pred );
> -
> -      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
> -         emit_store( bld, inst, 0, chan_index, pred[chan_index],
> dst0[chan_index]);
> +   /* If we have indirect addressing in outputs we need to copy our
> alloca array
> +    * to the outputs slots specified by the called */
> +   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
> +      unsigned index, chan;
> +      assert(bld_base->info->num_outputs <=
> +                        bld_base->info->file_max[TGSI_FILE_OUTPUT] +
> 1);
> +      for (index = 0; index < bld_base->info->num_outputs; ++index)
> {
> +         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
> +            bld->outputs[index][chan] = lp_get_output_ptr(bld,
> index, chan);
> +         }
>        }
>     }
> -
> -   return TRUE;
>  }
>  
> -
>  void
>  lp_build_tgsi_soa(struct gallivm_state *gallivm,
>                    const struct tgsi_token *tokens,
> @@ -2325,17 +1736,12 @@ lp_build_tgsi_soa(struct gallivm_state
> *gallivm,
>                    LLVMValueRef consts_ptr,
>                    LLVMValueRef system_values_array,
>                    const LLVMValueRef *pos,
> -                  const LLVMValueRef (*inputs)[NUM_CHANNELS],
> -                  LLVMValueRef (*outputs)[NUM_CHANNELS],
> +                  const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
> +                  LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
>                    struct lp_build_sampler_soa *sampler,
>                    const struct tgsi_shader_info *info)
>  {
>     struct lp_build_tgsi_soa_context bld;
> -   struct tgsi_parse_context parse;
> -   uint num_immediates = 0;
> -   uint num_instructions = 0;
> -   unsigned i;
> -   int pc = 0;
>  
>     struct lp_type res_type;
>  
> @@ -2347,7 +1753,7 @@ lp_build_tgsi_soa(struct gallivm_state
> *gallivm,
>  
>     /* Setup build context */
>     memset(&bld, 0, sizeof bld);
> -   lp_build_context_init(&bld.base, gallivm, type);
> +   lp_build_context_init(&bld.bld_base.base, gallivm, type);
>     lp_build_context_init(&bld.uint_bld, gallivm,
>     lp_uint_type(type));
>     lp_build_context_init(&bld.elem_bld, gallivm,
>     lp_elem_type(type));
>     bld.mask = mask;
> @@ -2356,145 +1762,55 @@ lp_build_tgsi_soa(struct gallivm_state
> *gallivm,
>     bld.outputs = outputs;
>     bld.consts_ptr = consts_ptr;
>     bld.sampler = sampler;
> -   bld.info = info;
> +   bld.bld_base.info = info;
>     bld.indirect_files = info->indirect_files;
> -   bld.instructions = (struct tgsi_full_instruction *)
> -                      MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct
> tgsi_full_instruction) );
> -   bld.max_instructions = LP_MAX_INSTRUCTIONS;
>  
> -   if (!bld.instructions) {
> -      return;
> -   }
> +   bld.bld_base.soa = TRUE;
> +   bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] =
> emit_fetch_constant;
> +   bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] =
> emit_fetch_immediate;
> +   bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] =
> emit_fetch_input;
> +   bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] =
> emit_fetch_temporary;
> +   bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] =
> emit_fetch_system_value;
> +   bld.bld_base.emit_store = emit_store;
> +
> +   bld.bld_base.emit_declaration = lp_emit_declaration_soa;
> +   bld.bld_base.emit_immediate = lp_emit_immediate_soa;
> +
> +   bld.bld_base.emit_prologue = emit_prologue;
> +   bld.bld_base.emit_epilogue = emit_epilogue;
> +
> +   /* Set opcode actions */
> +   lp_set_default_actions_cpu(&bld.bld_base);
> +
> +   bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
> +   bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
> +
> +   lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
>  
> -   lp_exec_mask_init(&bld.exec_mask, &bld.base);
> -
> -   if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
> -      LLVMValueRef array_size =
> -         lp_build_const_int32(gallivm,
> -                              info->file_max[TGSI_FILE_TEMPORARY] *
> 4 + 4);
> -      bld.temps_array = lp_build_array_alloca(gallivm,
> -                                              bld.base.vec_type,
> array_size,
> -                                              "temp_array");
> -   }
> -
> -   if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
> -      LLVMValueRef array_size =
> -         lp_build_const_int32(gallivm,
> -                              info->file_max[TGSI_FILE_OUTPUT] * 4 +
> 4);
> -      bld.outputs_array = lp_build_array_alloca(gallivm,
> -                                                bld.base.vec_type,
> array_size,
> -                                                "output_array");
> -   }
> -
> -   /* If we have indirect addressing in inputs we need to copy them
> into
> -    * our alloca array to be able to iterate over them */
> -   if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) {
> -      unsigned index, chan;
> -      LLVMTypeRef vec_type = bld.base.vec_type;
> -      LLVMValueRef array_size =
> -         lp_build_const_int32(gallivm,
> info->file_max[TGSI_FILE_INPUT]*4 + 4);
> -      bld.inputs_array = lp_build_array_alloca(gallivm,
> -                                               vec_type, array_size,
> -                                               "input_array");
> -
> -      assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] +
> 1);
> -
> -      for (index = 0; index < info->num_inputs; ++index) {
> -         for (chan = 0; chan < NUM_CHANNELS; ++chan) {
> -            LLVMValueRef lindex =
> -               lp_build_const_int32(gallivm, index * 4 + chan);
> -            LLVMValueRef input_ptr =
> -               LLVMBuildGEP(gallivm->builder, bld.inputs_array,
> -                            &lindex, 1, "");
> -            LLVMValueRef value = bld.inputs[index][chan];
> -            if (value)
> -               LLVMBuildStore(gallivm->builder, value, input_ptr);
> -         }
> -      }
> -   }
>  
>     bld.system_values_array = system_values_array;
>  
> -   tgsi_parse_init( &parse, tokens );
> -
> -   while( !tgsi_parse_end_of_tokens( &parse ) ) {
> -      tgsi_parse_token( &parse );
> -
> -      switch( parse.FullToken.Token.Type ) {
> -      case TGSI_TOKEN_TYPE_DECLARATION:
> -         /* Inputs already interpolated */
> -         emit_declaration( &bld, &parse.FullToken.FullDeclaration );
> -         break;
> -
> -      case TGSI_TOKEN_TYPE_INSTRUCTION:
> -         {
> -            /* save expanded instruction */
> -            if (num_instructions == bld.max_instructions) {
> -               struct tgsi_full_instruction *instructions;
> -               instructions = REALLOC(bld.instructions,
> -                                      bld.max_instructions
> -                                      * sizeof(struct
> tgsi_full_instruction),
> -                                      (bld.max_instructions +
> LP_MAX_INSTRUCTIONS)
> -                                      * sizeof(struct
> tgsi_full_instruction));
> -               if (!instructions) {
> -                  break;
> -               }
> -               bld.instructions = instructions;
> -               bld.max_instructions += LP_MAX_INSTRUCTIONS;
> -            }
> -
> -            memcpy(bld.instructions + num_instructions,
> -                   &parse.FullToken.FullInstruction,
> -                   sizeof(bld.instructions[0]));
> -
> -            num_instructions++;
> -         }
> -
> -         break;
> -
> -      case TGSI_TOKEN_TYPE_IMMEDIATE:
> -         /* simply copy the immediate values into the next
> immediates[] slot */
> -         {
> -            const uint size =
> parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
> -            assert(size <= 4);
> -            assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
> -            for( i = 0; i < size; ++i )
> -               bld.immediates[num_immediates][i] =
> -                  lp_build_const_vec(gallivm, type,
> parse.FullToken.FullImmediate.u[i].Float);
> -            for( i = size; i < 4; ++i )
> -               bld.immediates[num_immediates][i] = bld.base.undef;
> -            num_immediates++;
> -         }
> -         break;
> -
> -      case TGSI_TOKEN_TYPE_PROPERTY:
> -         break;
> -
> -      default:
> -         assert( 0 );
> -      }
> -   }
> -
> -   while (pc != -1) {
> -      struct tgsi_full_instruction *instr = bld.instructions + pc;
> -      const struct tgsi_opcode_info *opcode_info =
> -         tgsi_get_opcode_info(instr->Instruction.Opcode);
> -      if (!emit_instruction( &bld, instr, opcode_info, &pc ))
> -         _debug_printf("warning: failed to translate tgsi opcode %s
> to LLVM\n",
> -                       opcode_info->mnemonic);
> -   }
> -
> -   /* If we have indirect addressing in outputs we need to copy our
> alloca array
> -    * to the outputs slots specified by the called */
> -   if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
> -      unsigned index, chan;
> -      assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] +
> 1);
> -      for (index = 0; index < info->num_outputs; ++index) {
> -         for (chan = 0; chan < NUM_CHANNELS; ++chan) {
> -            bld.outputs[index][chan] = get_output_ptr(&bld, index,
> chan);
> -         }
> -      }
> -   }
> +   lp_build_tgsi_llvm(&bld.bld_base, tokens);
>  
>     if (0) {
>        LLVMBasicBlockRef block =
>        LLVMGetInsertBlock(gallivm->builder);
> @@ -2504,7 +1820,6 @@ lp_build_tgsi_soa(struct gallivm_state
> *gallivm,
>        lp_debug_dump_value(function);
>        debug_printf("2222222222222222222222222222 \n");
>     }
> -   tgsi_parse_free( &parse );
>  
>     if (0) {
>        LLVMModuleRef module = LLVMGetGlobalParent(
> @@ -2512,8 +1827,6 @@ lp_build_tgsi_soa(struct gallivm_state
> *gallivm,
>        LLVMDumpModule(module);
>  
>     }
> -
> -   FREE( bld.instructions );
>  }
>  
>  
> --
> 1.7.3.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 


More information about the mesa-dev mailing list