[Mesa-dev] [PATCH 03/11] st/mesa: implement GL_ATI_fragment_shader

Wed Dec 16 14:29:56 PST 2015

Thank you for your review. See my replies inline.

On 12/16/2015 05:26 PM, Marek Olšák wrote:
>   On Wed, Dec 16, 2015 at 12:05 AM, Miklós Máté <mtmkls at gmail.com> wrote:
>> ---
>>   src/mesa/Makefile.sources                 |   1 +
>>   src/mesa/state_tracker/st_atifs_to_tgsi.c | 798 ++++++++++++++++++++++++++++++
>>   src/mesa/state_tracker/st_atifs_to_tgsi.h |  49 ++
>>   src/mesa/state_tracker/st_atom_constbuf.c |  14 +
>>   src/mesa/state_tracker/st_cb_drawpixels.c |   1 +
>>   src/mesa/state_tracker/st_cb_program.c    |  35 +-
>>   src/mesa/state_tracker/st_program.c       |  22 +
>>   src/mesa/state_tracker/st_program.h       |   1 +
>>   8 files changed, 920 insertions(+), 1 deletion(-)
>>   create mode 100644 src/mesa/state_tracker/st_atifs_to_tgsi.c
>>   create mode 100644 src/mesa/state_tracker/st_atifs_to_tgsi.h
>>
>> diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
>> index ed9848c..a8e645d 100644
>> --- a/src/mesa/Makefile.sources
>> +++ b/src/mesa/Makefile.sources
>> @@ -390,6 +390,7 @@ VBO_FILES = \
>>          vbo/vbo_split_inplace.c
>>
>>   STATETRACKER_FILES = \
>> +       state_tracker/st_atifs_to_tgsi.c \
>>          state_tracker/st_atom_array.c \
>>          state_tracker/st_atom_blend.c \
>>          state_tracker/st_atom.c \
>> diff --git a/src/mesa/state_tracker/st_atifs_to_tgsi.c b/src/mesa/state_tracker/st_atifs_to_tgsi.c
>> new file mode 100644
>> index 0000000..1d704cb
>> --- /dev/null
>> +++ b/src/mesa/state_tracker/st_atifs_to_tgsi.c
>> @@ -0,0 +1,798 @@
>> +
>> +#include "main/mtypes.h"
>> +#include "main/atifragshader.h"
>> +#include "main/texobj.h"
>> +#include "main/errors.h"
>> +#include "program/prog_parameter.h"
>> +
>> +#include "tgsi/tgsi_ureg.h"
>> +#include "util/u_math.h"
>> +#include "util/u_memory.h"
>> +
>> +#include "st_program.h"
>> +#include "st_atifs_to_tgsi.h"
>> +
>> +/**
>> + * Intermediate state used during shader translation.
>> + */
>> +struct st_translate {
>> +   struct ureg_program *ureg;
>> +   struct gl_context *ctx;
>> +   struct ati_fragment_shader *atifs;
>> +
>> +   struct ureg_dst temps[MAX_PROGRAM_TEMPS];
>> +   struct ureg_src *constants;
>> +   struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
>> +   struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
>> +   struct ureg_dst address[1];
>> +   struct ureg_src samplers[PIPE_MAX_SAMPLERS];
>> +   struct ureg_src systemValues[SYSTEM_VALUE_MAX];
>> +
>> +   const GLuint *inputMapping;
>> +   const GLuint *outputMapping;
>> +
>> +   /* Keep a record of the tgsi instruction number that each mesa
>> +    * instruction starts at, will be used to fix up labels after
>> +    * translation.
>> +    */
>> +   unsigned *insn;
>> +   unsigned insn_size;
>> +   unsigned insn_count;
>> +
>> +   unsigned current_pass;
>> +
>> +   bool regs_written[MAX_NUM_PASSES_ATI][MAX_NUM_FRAGMENT_REGISTERS_ATI];
>> +
>> +   boolean error;
>> +};
>> +
>> +struct instruction_desc {
>> +   unsigned TGSI_opcode;
>> +   const char *name;
>> +   unsigned char arg_count;
>> +   unsigned char special; /* no 1:1 corresponding TGSI instruction */
>> +};
>> +
>> +/* index this array as inst_desc[ATI_opcode-GL_MOV_ATI] */
>> +static struct instruction_desc inst_desc[] = {
>> +   {TGSI_OPCODE_MOV, "MOV", 1, 0},
>> +   {TGSI_OPCODE_NOP, "UND", 0, 0}, /* unused */
>> +   {TGSI_OPCODE_ADD, "ADD", 2, 0},
>> +   {TGSI_OPCODE_MUL, "MUL", 2, 0},
>> +   {TGSI_OPCODE_SUB, "SUB", 2, 0},
>> +   {TGSI_OPCODE_DP3, "DOT3", 2, 0},
>> +   {TGSI_OPCODE_DP4, "DOT4", 2, 0},
>> +   {TGSI_OPCODE_MAD, "MAD", 3, 0},
>> +   {TGSI_OPCODE_LRP, "LERP", 3, 0},
>> +   {TGSI_OPCODE_NOP, "CND", 3, 1},
>> +   {TGSI_OPCODE_NOP, "CND0", 3, 2},
>> +   {TGSI_OPCODE_NOP, "DOT2_ADD", 3, 3}
>> +};
>> +
>> +/**
>> + * Called prior to emitting the TGSI code for each Mesa instruction.
>> + * Allocate additional space for instructions if needed.
>> + * Update the insn[] array so the next Mesa instruction points to
>> + * the next TGSI instruction.
>> + * Copied from st_mesa_to_tgsi.c
>> + */
>> +static void set_insn_start(struct st_translate *t,
>> +      unsigned start)
>> +{
>> +   if (t->insn_count + 1 >= t->insn_size) {
>> +      t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
>> +      t->insn = realloc(t->insn, t->insn_size * sizeof t->insn[0]);
>> +      if (t->insn == NULL) {
>> +         t->error = TRUE;
>> +         return;
>> +      }
>> +   }
>> +
>> +   t->insn[t->insn_count++] = start;
>> +}
>> +
>> +static void emit_insn(struct st_translate *t,
>> +          unsigned opcode,
>> +          const struct ureg_dst *dst,
>> +          unsigned nr_dst,
>> +          const struct ureg_src *src,
>> +          unsigned nr_src)
>> +{
>> +   set_insn_start(t, ureg_get_instruction_number(t->ureg));
>> +   ureg_insn(t->ureg, opcode, dst, nr_dst, src, nr_src);
>> +}
>> +
>> +static struct ureg_dst get_temp(struct st_translate *t, unsigned index)
>> +{
>> +   if (ureg_dst_is_undef(t->temps[index]))
>> +      t->temps[index] = ureg_DECL_temporary(t->ureg);
>> +   return t->temps[index];
>> +}
>> +
>> +static struct ureg_src apply_swizzle(struct st_translate *t,
>> +      struct ureg_src src, GLuint swizzle)
>> +{
>> +   if (swizzle == GL_SWIZZLE_STR_ATI) {
>> +      return src;
>> +   } else if (swizzle == GL_SWIZZLE_STQ_ATI) {
>> +      return ureg_swizzle(src,
>> +            TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z);
>> +   } else {
>> +      struct ureg_dst tmp[2];
>> +      struct ureg_src imm[3];
>> +
>> +      tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
>> +      tmp[1] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI+1);
>> +      imm[0] = src;
>> +      imm[1] = ureg_imm4f(t->ureg, 1.0, 1.0, 0.0, 0.0);
>> +      imm[2] = ureg_imm4f(t->ureg, 0.0, 0.0, 1.0, 1.0);
>> +      emit_insn(t, TGSI_OPCODE_MAD, &tmp[0], 1, imm, 3);
>> +
>> +      if (swizzle == GL_SWIZZLE_STR_DR_ATI)
>> +         imm[0] = ureg_swizzle(src,
>> +               TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
>> +      else
>> +         imm[0] = ureg_swizzle(src,
>> +               TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
>> +      emit_insn(t, TGSI_OPCODE_RCP, &tmp[1], 1, &imm[0], 1);
>> +
>> +      imm[0] = ureg_src(tmp[0]);
>> +      imm[1] = ureg_src(tmp[1]);
>> +      emit_insn(t, TGSI_OPCODE_MUL, &tmp[0], 1, imm, 2);
>> +
>> +      return ureg_src(tmp[0]);
>> +   }
>> +}
>> +
>> +/**
>> + * Map mesa texture target to TGSI texture target.
>> + * copied from st_mesa_to_tgsi.c
>> + */
>> +static unsigned translate_texture_target(GLuint textarget)
>> +{
>> +   /* the shadow part is omitted */
>> +   switch( textarget ) {
>> +      case TEXTURE_2D_MULTISAMPLE_INDEX: return TGSI_TEXTURE_2D_MSAA;
>> +      case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX: return TGSI_TEXTURE_2D_ARRAY_MSAA;
>> +      case TEXTURE_BUFFER_INDEX: return TGSI_TEXTURE_BUFFER;
>> +      case TEXTURE_1D_INDEX:   return TGSI_TEXTURE_1D;
>> +      case TEXTURE_2D_INDEX:   return TGSI_TEXTURE_2D;
>> +      case TEXTURE_3D_INDEX:   return TGSI_TEXTURE_3D;
>> +      case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE;
>> +      case TEXTURE_CUBE_ARRAY_INDEX: return TGSI_TEXTURE_CUBE_ARRAY;
>> +      case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT;
>> +      case TEXTURE_1D_ARRAY_INDEX:   return TGSI_TEXTURE_1D_ARRAY;
>> +      case TEXTURE_2D_ARRAY_INDEX:   return TGSI_TEXTURE_2D_ARRAY;
>> +      case TEXTURE_EXTERNAL_INDEX:   return TGSI_TEXTURE_2D;
>> +      default:
>> +         debug_assert( 0 );
>> +         return TGSI_TEXTURE_1D;
>> +   }
>> +}
>> +
>> +static
>> +gl_texture_index get_texture_target_index(struct gl_context *ctx,
>> +      const unsigned r)
>> +{
>> +   struct gl_texture_object *texObj = ctx->Texture.Unit[r]._Current;
>> +   if (texObj) {
>> +      //TODO TargetIndex should be used here, but that's not always set
>> +      return _mesa_tex_target_to_index(ctx, texObj->Target);
>> +   } else {
>> +      /* fallback for missing texture */
>> +      //_mesa_debug(0, "texture %u is missing\n", r);
>> +      return TEXTURE_2D_INDEX;
>> +   }
>> +}
>> +
>> +static struct ureg_src get_source(struct st_translate *t, GLuint src_type)
>> +{
>> +   //_mesa_debug(0, "source type %u\n", src_type);
>> +   if (src_type >= GL_REG_0_ATI && src_type <= GL_REG_5_ATI) {
>> +      //TODO why are registers defined up to 31 in glext.h?
>> +      if (t->regs_written[t->current_pass][src_type - GL_REG_0_ATI])
>> +         return ureg_src(get_temp(t, src_type - GL_REG_0_ATI));
>> +      else
>> +         return ureg_imm1f(t->ureg, 0.0);
>> +   } else if (src_type >= GL_CON_0_ATI && src_type <= GL_CON_7_ATI) {
>> +      //TODO why are constants defined up to 31 in glext.h?
>> +      return t->constants[src_type - GL_CON_0_ATI];
>> +   } else if (src_type == GL_ZERO) {
>> +      return ureg_imm1f(t->ureg, 0.0);
>> +   } else if (src_type == GL_ONE) {
>> +      return ureg_imm1f(t->ureg, 1.0);
>> +   } else if (src_type == GL_PRIMARY_COLOR_ARB) {
>> +      return t->inputs[t->inputMapping[VARYING_SLOT_COL0]];
>> +   } else if (src_type == GL_SECONDARY_INTERPOLATOR_ATI) {
>> +      return t->inputs[t->inputMapping[VARYING_SLOT_COL1]];
>> +   } else {
>> +      assert(!"unknown source");
>> +      return ureg_imm1f(t->ureg, 0.0);
>> +   }
>> +}
>> +
>> +static struct ureg_src prepare_argument(struct st_translate *t, const unsigned argId,
>> +      const struct atifragshader_src_register *srcReg)
>> +{
>> +   struct ureg_src src = get_source(t, srcReg->Index);
>> +   struct ureg_dst arg = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI+argId);
>> +
>> +   switch (srcReg->argRep) {
>> +      case GL_NONE:
>> +         break;
>> +      case GL_RED:
>> +         src = ureg_swizzle(src,
>> +               TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
>> +         break;
>> +      case GL_GREEN:
>> +         src = ureg_swizzle(src,
>> +               TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y);
>> +         break;
>> +      case GL_BLUE:
>> +         src = ureg_swizzle(src,
>> +               TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
>> +         break;
>> +      case GL_ALPHA:
>> +         src = ureg_swizzle(src,
>> +               TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
>> +         break;
>> +   }
>> +   emit_insn(t, TGSI_OPCODE_MOV, &arg, 1, &src, 1);
>> +
>> +   if (srcReg->argMod & GL_COMP_BIT_ATI) {
>> +      struct ureg_src modsrc[2];
>> +      modsrc[0] = ureg_imm1f(t->ureg, 1.0);
>> +      modsrc[1] = ureg_src(arg);
>> +
>> +      emit_insn(t, TGSI_OPCODE_SUB, &arg, 1, modsrc, 2);
>> +   }
>> +   if (srcReg->argMod & GL_BIAS_BIT_ATI) {
>> +      struct ureg_src modsrc[2];
>> +      modsrc[0] = ureg_src(arg);
>> +      modsrc[1] = ureg_imm1f(t->ureg, 0.5);
>> +
>> +      emit_insn(t, TGSI_OPCODE_SUB, &arg, 1, modsrc, 2);
>> +   }
>> +   if (srcReg->argMod & GL_2X_BIT_ATI) {
>> +      struct ureg_src modsrc[2];
>> +      modsrc[0] = ureg_src(arg);
>> +      modsrc[1] = ureg_imm1f(t->ureg, 2.0);
>> +
>> +      emit_insn(t, TGSI_OPCODE_MUL, &arg, 1, modsrc, 2);
>> +   }
>> +   if (srcReg->argMod & GL_NEGATE_BIT_ATI) {
>> +      struct ureg_src modsrc[2];
>> +      modsrc[0] = ureg_src(arg);
>> +      modsrc[1] = ureg_imm1f(t->ureg, -1.0);
>> +
>> +      emit_insn(t, TGSI_OPCODE_MUL, &arg, 1, modsrc, 2);
>> +   }
>> +   return  ureg_src(arg);
>> +}
>> +
>> +/* These instructions have no direct equivalent in TGSI */
>> +static void emit_special_inst(struct st_translate *t, struct instruction_desc *desc,
>> +      struct ureg_dst *dst, struct ureg_src *args, unsigned argcount)
>> +{
>> +   struct ureg_dst tmp[1];
>> +   struct ureg_src src[3];
>> +
>> +   if        (desc->special == 1) {
>> +      tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI+2); // re-purpose a3
>> +      src[0] = ureg_imm1f(t->ureg, 0.5f);
>> +      src[1] = args[2];
>> +      emit_insn(t, TGSI_OPCODE_SLT, tmp, 1, src, 2);
>> +      src[0] = ureg_src(tmp[0]);
>> +      src[1] = args[0];
>> +      src[2] = args[1];
>> +      emit_insn(t, TGSI_OPCODE_LRP, dst, 1, src, 3);
>> +   } else if (desc->special == 2) {
>> +      tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI+2); // re-purpose a3
>> +      src[0] = args[2];
>> +      src[1] = ureg_imm1f(t->ureg, 0.0f);
>> +      emit_insn(t, TGSI_OPCODE_SGE, tmp, 1, src, 2);
>> +      src[0] = ureg_src(tmp[0]);
>> +      src[1] = args[0];
>> +      src[2] = args[1];
>> +      emit_insn(t, TGSI_OPCODE_LRP, dst, 1, src, 3);
>> +   } else if (desc->special == 3) {
>> +      src[0] = args[0];
>> +      src[1] = args[1];
>> +      src[2] = ureg_swizzle(args[2],
>> +            TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
>> +      emit_insn(t, TGSI_OPCODE_DP2A, dst, 1, src, 3);
>> +   }
>> +}
>> +
>> +static void emit_arith_inst(struct st_translate *t,
>> +      struct instruction_desc *desc,
>> +      struct ureg_dst *dst, struct ureg_src *args, unsigned argcount)
>> +{
>> +   if (desc->special) {
>> +      return emit_special_inst(t, desc, dst, args, argcount);
>> +   }
>> +
>> +   emit_insn(t, desc->TGSI_opcode, dst, 1, args, argcount);
>> +}
>> +
>> +static void emit_dstmod(struct st_translate *t,
>> +      struct ureg_dst dst, GLuint dstMod)
>> +{
>> +   float imm = 0.0;
>> +   struct ureg_src src[3];
>> +
>> +   if (dstMod == GL_NONE) {
>> +      return;
>> +   }
>> +
>> +   if        (dstMod & GL_2X_BIT_ATI) {
>> +      imm = 2.0f;
>> +   } else if (dstMod & GL_4X_BIT_ATI) {
>> +      imm = 4.0f;
>> +   } else if (dstMod & GL_8X_BIT_ATI) {
>> +      imm = 8.0f;
>> +   } else if (dstMod & GL_HALF_BIT_ATI) {
>> +      imm = 0.5f;
>> +   } else if (dstMod & GL_QUARTER_BIT_ATI) {
>> +      imm = 0.25f;
>> +   } else if (dstMod & GL_EIGHTH_BIT_ATI) {
>> +      imm = 0.125f;
>> +   }
>> +   if (imm) {
>> +      src[0] = ureg_src(dst);
>> +      src[1] = ureg_imm1f(t->ureg, imm);
>> +      if (dstMod & GL_SATURATE_BIT_ATI) {
>> +         dst = ureg_saturate(dst);
>> +      }
>> +      emit_insn(t, TGSI_OPCODE_MUL, &dst, 1, src, 2);
>> +   }
>> +}
>> +
>> +/**
>> + * Compile one setup instruction to TGSI instructions.
>> + */
>> +static void compile_setupinst(struct gl_context *ctx,
>> +      struct st_translate *t,
>> +      const unsigned r,
>> +      const struct atifs_setupinst *texinst)
>> +{
>> +   struct ureg_dst dst[1];
>> +   struct ureg_src src[2];
>> +
>> +   if (!texinst->Opcode)
>> +      return;
>> +
>> +   dst[0] = get_temp(t, r);
>> +
>> +   GLuint pass_tex = texinst->src;
>> +
>> +   if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
>> +      unsigned attr = pass_tex - GL_TEXTURE0_ARB + VARYING_SLOT_TEX0;
>> +      src[0] = t->inputs[t->inputMapping[attr]];
>> +   } else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
>> +      unsigned reg = pass_tex - GL_REG_0_ATI;
>> +      /* the frontend already validated that REG is only allowed in second pass */
>> +      if (t->regs_written[0][reg]) {
>> +         src[0] = ureg_src(t->temps[reg]);
>> +      } else {
>> +         src[0] = ureg_imm1f(t->ureg, 0.0);
>> +      }
>> +   }
>> +   src[0] = apply_swizzle(t, src[0], texinst->swizzle);
>> +
>> +   set_insn_start(t, ureg_get_instruction_number(t->ureg));
>> +   if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) {
>> +      /* use the current texture target for the sample operation
>> +       * note: this implementation doesn't support re-using an ATI_fs
>> +       *    with different texture targets
>> +       */
>> +      gl_texture_index index = get_texture_target_index(ctx, r);
>> +      unsigned target = translate_texture_target(index);
>> +      //_mesa_debug(0, "texture %u index %u target %u\n", r, index, target);
>> +
>> +      // by default texture and sampler indexes are the same
>> +      //_mesa_debug(0, "using sampler %u\n", r);
>> +      src[1] = t->samplers[r];
>> +      ureg_tex_insn(t->ureg, TGSI_OPCODE_TEX, dst, 1, target,
>> +            NULL, 0, src, 2);
>> +   } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
>> +      ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1);
>> +   }
>> +
>> +   t->regs_written[t->current_pass][r] = true;
>> +}
>> +
>> +/**
>> + * Compile one arithmetic operation COLOR&ALPHA pair into TGSI instructions.
>> + */
>> +static void compile_instruction(struct st_translate *t,
>> +      const struct atifs_instruction *inst)
>> +{
>> +   unsigned optype;
>> +
>> +   for (optype=0; optype<2; optype++) { // color, alpha
>> +      struct instruction_desc *desc;
>> +      struct ureg_dst dst[1];
>> +      struct ureg_src args[3]; // arguments for the main operation
>> +      unsigned arg;
>> +      unsigned dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI;
>> +
>> +      if (!inst->Opcode[optype])
>> +         continue;
>> +
>> +      desc = &inst_desc[inst->Opcode[optype]-GL_MOV_ATI];
>> +
>> +      /* prepare the arguments */
>> +      for (arg=0; arg<desc->arg_count; arg++) {
>> +         if (arg >= inst->ArgCount[optype]) {
>> +            _mesa_debug(0, "warning: using 0 for missing argument %d of %s\n",
>> +                  arg, desc->name);
>> +            args[arg] = ureg_imm1f(t->ureg, 0.0);
>> +         } else {
>> +            args[arg] = prepare_argument(t, arg,
>> +                  &inst->SrcReg[optype][arg]);
>> +         }
>> +      }
>> +
>> +      /* prepare dst */
>> +      dst[0] = get_temp(t, dstreg);
>> +
>> +      if (optype)
>> +         dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_W);
>> +      else {
>> +         GLuint dstMask = inst->DstReg[optype].dstMask;
>> +         if (dstMask == GL_NONE)
>> +            dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ);
>> +         else
>> +            dst[0] = ureg_writemask(dst[0], dstMask); /* the enum values match */
>> +      }
>> +
>> +      /* emit the main instruction */
>> +      emit_arith_inst(t, desc, dst, args, arg);
>> +
>> +      emit_dstmod(t, *dst, inst->DstReg[optype].dstMod);
>> +
>> +      t->regs_written[t->current_pass][dstreg] = true;
>> +   }
>> +}
>> +
>> +/*static void add_tint(struct st_translate *t, float r, float g, float b)
>> +{
>> +   struct ureg_dst dst[1];
>> +   struct ureg_src src[2];
>> +   dst[0] = t->temps[0];
>> +   src[0] = ureg_src(t->temps[0]);
>> +   src[1] = ureg_imm4f(t->ureg, r, g, b, 1.0);
>> +   emit_insn(t, TGSI_OPCODE_MUL, dst, 1, src, 2);
>> +}*/
>> +
>> +static void apply_fog(struct st_translate *t)
>> +{
>> +   struct gl_fog_attrib *fog = &t->ctx->Fog;
>> +   struct ureg_src oparams = t->constants[8];
>> +   struct ureg_src fogcolor = t->constants[9];
>> +   /* this is a single float in the X coordinate */
>> +   struct ureg_src fogcoord = t->inputs[t->inputMapping[VARYING_SLOT_FOGC]];
>> +
>> +   struct ureg_dst dst[1];
>> +   struct ureg_src src[3];
>> +
>> +   /* Detect ATIfs 8 of KotOR, and render black in this pass
>> +    * to get correct fog on the ground in Kashyyyk Shadowlands */
>> +   if (t->atifs->NumPasses == 1) {
>> +      struct atifs_instruction *firstinst = &t->atifs->Instructions[0][0];
>> +      if (firstinst->Opcode[0] == GL_MOV_ATI
>> +            && firstinst->SrcReg[0][0].Index == GL_PRIMARY_COLOR_ARB
>> +            && firstinst->DstReg[0].Index == GL_REG_0_ATI) {
>> +         _mesa_debug(0, "Skipping this pass\n");
>> +         dst[0] = t->temps[0];
>> +         src[0] = ureg_imm1f(t->ureg, 0.0f);
>> +         emit_insn(t, TGSI_OPCODE_MOV, dst, 1, src, 1);
>> +         return;
>> +      }
>> +   }
>> +
>> +   _mesa_debug(0, "Adding fog\n");
>> +
>> +   /* compute the 1 component fog factor f */
>> +   if (fog->Mode == GL_LINEAR) {
>> +      /* formula: f = (end - z) / (end - start)
>> +       * with optimized parameters: f = MAD(fogcoord, oparams.x, oparams.y)
>> +       */
>> +      dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
>> +      src[0] = fogcoord;
>> +      src[1] = ureg_swizzle(oparams,
>> +            TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
>> +      src[2] = ureg_swizzle(oparams,
>> +            TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y);
>> +
>> +      emit_insn(t, TGSI_OPCODE_MAD, dst, 1, src, 3);
>> +   } else if (fog->Mode == GL_EXP) {
>> +      /* formula: f = exp(-dens * z)
>> +       * with optimized parameters:
>> +       *    f = MUL(fogcoord, oparams.z); f= EX2(-f)
>> +       */
>> +      dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
>> +      src[0] = fogcoord;
>> +      src[1] = ureg_swizzle(oparams,
>> +            TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
>> +      emit_insn(t, TGSI_OPCODE_MUL, dst, 1, src, 2);
>> +      dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
>> +      src[0] = ureg_negate(ureg_src(dst[0]));
>> +      emit_insn(t, TGSI_OPCODE_EX2, dst, 1, src, 1);
>> +   } else if (fog->Mode == GL_EXP2) {
>> +      /* formula: f = exp(-(dens * z)^2)
>> +       * with optimized parameters:
>> +       *    f = MUL(fogcoord, oparams.w); f=MUL(f, f); f= EX2(-f)
>> +       */
>> +      dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
>> +      src[0] = fogcoord;
>> +      src[1] = ureg_swizzle(oparams,
>> +            TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
>> +      emit_insn(t, TGSI_OPCODE_MUL, dst, 1, src, 2);
>> +      dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
>> +      src[0] = ureg_src(dst[0]);
>> +      emit_insn(t, TGSI_OPCODE_MUL, dst, 1, src, 2);
>> +      dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
>> +      src[0] = ureg_negate(ureg_src(dst[0]));
>> +      emit_insn(t, TGSI_OPCODE_EX2, dst, 1, src, 1);
>> +   }
>> +
>> +   //TODO do we need f = CLAMP(f)? ff_fs does this, programopt doesn't
>> +   dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
>> +   src[0] = ureg_src(dst[0]);
>> +   src[1] = ureg_imm1f(t->ureg, 0.0f);
>> +   src[2] = ureg_imm1f(t->ureg, 1.0f);
>> +   emit_insn(t, TGSI_OPCODE_CLAMP, dst, 1, src, 3);
>> +
>> +   /* REG0=LRP(f.xxxy, REG0, fogcolor) */
>> +   //TODO ff_fs uses a series of arithmetic instead of lerp
>> +   dst[0] = t->temps[0];
>> +   src[0] = ureg_swizzle(ureg_src(get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI)),
>> +         TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y);
>> +   src[1] = ureg_src(t->temps[0]);
>> +   src[2] = fogcolor;
>> +   emit_insn(t, TGSI_OPCODE_LRP, dst, 1, src, 3);
>> +}
>> +
>> +static void finalize_shader(struct st_translate *t, unsigned numPasses)
>> +{
>> +   struct ureg_dst dst[1] = { { 0 } };
>> +   struct ureg_src src[1] = { { 0 } };
>> +   if (t->regs_written[numPasses-1][0]) {
>> +      if (t->ctx->Fog.Enabled)
>> +         apply_fog(t);
>> +
>> +      /*add_tint(t, 1, 0, 1);*/
>> +
>> +      /* copy the result into the OUT slot */
>> +      dst[0] = t->outputs[t->outputMapping[FRAG_RESULT_COLOR]];
>> +      src[0] = ureg_src(t->temps[0]);
>> +      emit_insn(t, TGSI_OPCODE_MOV, dst, 1, src, 1);
>> +   }
>> +
>> +   /* signal the end of the program */
>> +   emit_insn(t, TGSI_OPCODE_END, dst, 0, src, 0);
>> +}
>> +
>> +/**
>> + * Called when a new variant is needed, we need to translate the ATI fragment shader
>> + * to TGSI
>> + */
>> +enum pipe_error
>> +st_translate_atifs_program(
>> +      struct gl_context *ctx,
>> +      uint procType,
>> +      struct ureg_program *ureg,
>> +      struct ati_fragment_shader *atifs,
>> +      struct gl_program *program,
>> +      GLuint numInputs,
>> +      const GLuint inputMapping[],
>> +      const ubyte inputSemanticName[],
>> +      const ubyte inputSemanticIndex[],
>> +      const GLuint interpMode[],
>> +      GLuint numOutputs,
>> +      const GLuint outputMapping[],
>> +      const ubyte outputSemanticName[],
>> +      const ubyte outputSemanticIndex[],
>> +      boolean passthrough_edgeflags,
>> +      boolean clamp_color)
> The last 2 parameters can be removed. Those two are now applied on the
> TGSI representation later.
Fixed.
>
>> +{
>> +   enum pipe_error ret = PIPE_OK;
>> +
>> +   unsigned pass, i, r;
>> +
>> +   struct st_translate translate, *t;
>> +   t = &translate;
>> +   memset(t, 0, sizeof *t);
>> +
>> +   t->inputMapping = inputMapping;
>> +   t->outputMapping = outputMapping;
>> +   t->ureg = ureg;
>> +   t->ctx = ctx;
>> +   t->atifs = atifs;
>> +
>> +   _mesa_debug(0, "Compiling ATI fragment shader %u\n", atifs->Id);
>> +
>> +   /*
>> +    * Declare input attributes.
>> +    */
>> +   for (i = 0; i < numInputs; i++) {
>> +      t->inputs[i] = ureg_DECL_fs_input(ureg,
>> +            inputSemanticName[i],
>> +            inputSemanticIndex[i],
>> +            interpMode[i]);
>> +   }
>> +
>> +   /*
>> +    * Declare output attributes:
>> +    *  we always have numOutputs=1 and it's FRAG_RESULT_COLOR
>> +    */
>> +   // TODO assert(numOutputs==1 && outputSemanticName[0]==TGSI_SEMANTIC_COLOR && outputSemanticIndex[0]==0); ?
>> +   t->outputs[0] = ureg_DECL_output( ureg,
>> +         TGSI_SEMANTIC_COLOR,
>> +         outputSemanticIndex[0] );
>> +
>> +   /* Emit constants and immediates.  Mesa uses a single index space
>> +    * for these, so we put all the translated regs in t->constants.
>> +    */
>> +   if (program->Parameters) {
>> +      t->constants = calloc( program->Parameters->NumParameters,
>> +            sizeof t->constants[0] );
>> +      if (t->constants == NULL) {
>> +         ret = PIPE_ERROR_OUT_OF_MEMORY;
>> +         goto out;
>> +      }
>> +
>> +      for (i = 0; i < program->Parameters->NumParameters; i++) {
>> +         switch (program->Parameters->Parameters[i].Type) {
>> +            case PROGRAM_STATE_VAR:
>> +            case PROGRAM_UNIFORM:
>> +               t->constants[i] = ureg_DECL_constant( ureg, i );
>> +               break;
>> +
>> +            case PROGRAM_CONSTANT:
>> +               t->constants[i] =
>> +                  ureg_DECL_immediate( ureg,
>> +                        (const float*)program->Parameters->ParameterValues[i],
>> +                        4 );
>> +               break;
>> +            default:
>> +               break;
>> +         }
>> +      }
>> +   }
>> +
>> +   /* texture samplers */
>> +   for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) {
>> +      if (program->SamplersUsed & (1 << i)) {
>> +         //TODO here, if texture is undef, we need to cancel the reference
>> +         //       and later convert the sample instruction into MOV(reg, 0) (?)
>> +         t->samplers[i] = ureg_DECL_sampler( ureg, i );
>> +
>> +         /* fix texture targets that are not 2D */
>> +         /* note: this implementation doesn't support re-using an ATI_fs
>> +          *    with different texture targets
>> +          */
>> +         gl_texture_index index = get_texture_target_index(ctx, i);
>> +         if (index != TEXTURE_2D_INDEX) {
>> +            _mesa_debug(0, "Fixing tex target %u to %u\n", i, index);
>> +            program->TexturesUsed[i] &= ~TEXTURE_2D_BIT;
>> +            program->TexturesUsed[i] |= (1 << index);
>> +         } else {
>> +            _mesa_debug(0, "Tex target %u stays TEXTURE_2D\n", i);
>> +         }
>> +      }
>> +   }
>> +
>> +   /* emit instructions */
>> +   for (pass=0; pass<atifs->NumPasses; pass++) {
>> +      t->current_pass = pass;
>> +      for (r=0; r<MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) {
>> +         struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r];
>> +         compile_setupinst(ctx, t, r, texinst);
>> +      }
>> +      for (i=0; i<atifs->numArithInstr[pass]; i++) {
>> +         struct atifs_instruction *inst = &atifs->Instructions[pass][i];
>> +         compile_instruction(t, inst);
>> +      }
>> +   }
>> +
>> +   finalize_shader(t, atifs->NumPasses);
>> +
>> +out:
>> +   free(t->insn);
>> +   free(t->constants);
>> +
>> +   if (t->error) {
>> +      debug_printf("%s: translate error flag set\n", __func__);
>> +   }
>> +   _mesa_debug(0, "ATI fragment shader is compiled\n");
>> +
>> +   return ret;
>> +}
>> +
>> +/**
>> + * Called in ProgramStringNotify, we need to fill the metadata of the
>> + * gl_program attached to the ati_fragment_shader
>> + */
>> +void
>> +st_init_atifs_prog(struct gl_context *ctx, struct gl_program *prog)
>> +{
>> +   /* we know this is st_fragment_program, because of st_new_ati_fs() */
>> +   struct st_fragment_program *stfp = (struct st_fragment_program *) prog;
>> +   struct ati_fragment_shader *atifs = stfp->ati_fs;
>> +
>> +   unsigned pass, i, r, optype, arg;
>> +
>> +   static const gl_state_index fog_params_state[STATE_LENGTH] =
>> +      {STATE_INTERNAL, STATE_FOG_PARAMS_OPTIMIZED, 0, 0, 0};
>> +   static const gl_state_index fog_color[STATE_LENGTH] =
>> +   {STATE_FOG_COLOR, 0, 0, 0, 0};
>> +
>> +   _mesa_debug(0, "Initializing the gl_program of ATI fs\n");
>> +
>> +   prog->InputsRead = 0;
>> +   prog->OutputsWritten = BITFIELD64_BIT(FRAG_RESULT_COLOR);
>> +   prog->SamplersUsed = 0;
>> +   prog->Parameters = _mesa_new_parameter_list();
>> +
>> +   /* fill in InputsRead, SamplersUsed, TexturesUsed */
>> +   for (pass=0; pass<atifs->NumPasses; pass++) {
>> +      for (r=0; r<MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) {
>> +         struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r];
>> +         GLuint pass_tex = texinst->src;
>> +
>> +         if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) {
>> +            /* mark which texcoords are used */
>> +            prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB);
>> +            /* by default there is 1:1 mapping between samplers and textures */
>> +            prog->SamplersUsed |= (1 << r);
>> +            /* the target is unknown here in glEndFragmentShaderATI(),
>> +             * it will be fixed this during compiling, which is in the draw call
>> +             */
>> +            prog->TexturesUsed[r] |= TEXTURE_2D_BIT;
>> +         } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
>> +            if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
>> +               prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB);
>> +            }
>> +         }
>> +      }
>> +   }
>> +   for (pass=0; pass<atifs->NumPasses; pass++) {
>> +      for (i=0; i<atifs->numArithInstr[pass]; i++) {
>> +         struct atifs_instruction *inst = &atifs->Instructions[pass][i];
> Please add space between declarations and code.
Done.
>
>> +         for (optype=0; optype<2; optype++) { // color, alpha
>> +            if (inst->Opcode[optype]) {
>> +               for (arg=0; arg<inst->ArgCount[optype]; arg++) {
>> +                  GLint index = inst->SrcReg[optype][arg].Index;
>> +                  if (index == GL_PRIMARY_COLOR_EXT) {
>> +                     prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_COL0);
>> +                  } else if (index == GL_SECONDARY_INTERPOLATOR_ATI) {
>> +                     /* note: ATI_fragment_shader.txt never specifies what
>> +                      * GL_SECONDARY_INTERPOLATOR_ATI is, swrast uses
>> +                      * VARYING_SLOT_COL1 for this input */
>> +                     prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_COL1);
>> +                  }
>> +               }
>> +            }
>> +         }
>> +      }
>> +   }
>> +   /* We may need fog */
>> +   prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_FOGC);
>> +
>> +   /* we always have 8 ATI_fs constants, and the fog params */
>> +   for (i=0; i<8; i++) {
>> +      //TODO check the return value of this
>> +      _mesa_add_parameter(prog->Parameters, PROGRAM_UNIFORM,
>> +            NULL, 4, GL_FLOAT, NULL, NULL);
>> +   }
>> +   //TODO check the return value of these, too
>> +   _mesa_add_state_reference(prog->Parameters, fog_params_state);
>> +   _mesa_add_state_reference(prog->Parameters, fog_color);
>> +
>> +   prog->NumInstructions = 0;
>> +   prog->NumTemporaries = 6+3; // 6 registers, 3 input temps for arith ops
>> +   prog->NumParameters = 8+2; // the number of constants + state variables
>> +}
>> diff --git a/src/mesa/state_tracker/st_atifs_to_tgsi.h b/src/mesa/state_tracker/st_atifs_to_tgsi.h
>> new file mode 100644
>> index 0000000..f7e04e9
>> --- /dev/null
>> +++ b/src/mesa/state_tracker/st_atifs_to_tgsi.h
>> @@ -0,0 +1,49 @@
>> +//TODO copyright header
>> +
>> +
>> +#ifndef ST_ATIFS_TO_TGSI_H
>> +#define ST_ATIFS_TO_TGSI_H
>> +
>> +#if defined __cplusplus
>> +extern "C" {
>> +#endif
>> +
>> +#include "main/glheader.h"
>> +
>> +#include "pipe/p_compiler.h"
>> +#include "pipe/p_defines.h"
>> +
>> +struct gl_context;
>> +struct gl_program;
>> +struct tgsi_token;
>> +struct ureg_program;
>> +
>> +enum pipe_error
>> +st_translate_atifs_program(
>> +    struct gl_context *ctx,
>> +    uint procType,
>> +    struct ureg_program *ureg,
>> +    struct ati_fragment_shader *atifs,
>> +    struct gl_program *program,
>> +    GLuint numInputs,
>> +    const GLuint inputMapping[],
>> +    const ubyte inputSemanticName[],
>> +    const ubyte inputSemanticIndex[],
>> +    const GLuint interpMode[],
>> +    GLuint numOutputs,
>> +    const GLuint outputMapping[],
>> +    const ubyte outputSemanticName[],
>> +    const ubyte outputSemanticIndex[],
>> +    boolean passthrough_edgeflags,
>> +    boolean clamp_color);
>> +
>> +
>> +void
>> +st_init_atifs_prog(struct gl_context *ctx, struct gl_program *prog);
>> +
>> +
>> +#if defined __cplusplus
>> +} /* extern "C" */
>> +#endif
>> +
>> +#endif /* ST_ATIFS_TO_TGSI_H */
>> diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c
>> index 6affb4d..3bd090f 100644
>> --- a/src/mesa/state_tracker/st_atom_constbuf.c
>> +++ b/src/mesa/state_tracker/st_atom_constbuf.c
>> @@ -63,6 +63,20 @@ void st_upload_constants( struct st_context *st,
>>             shader_type == PIPE_SHADER_TESS_CTRL ||
>>             shader_type == PIPE_SHADER_TESS_EVAL);
>>
>> +   /* update the ATI constants before rendering */
>> +   struct ati_fragment_shader *ati_fs = st->fp->ati_fs;
>> +   if (ati_fs) {
>> +      unsigned c;
>> +      for (c=0; c<8; c++) {
>> +         if (ati_fs->LocalConstDef & (1 << c))
>> +            memcpy(params->ParameterValues[c],
>> +                  ati_fs->Constants[c], sizeof(GLfloat) * 4);
>> +         else
>> +            memcpy(params->ParameterValues[c],
>> +                  st->ctx->ATIFragmentShader.GlobalConstants[c], sizeof(GLfloat) * 4);
>> +      }
>> +   }
>> +
>>      /* update constants */
>>      if (params && params->NumParameters) {
>>         struct pipe_constant_buffer cb;
>> diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
>> index b372697..bf764e5 100644
>> --- a/src/mesa/state_tracker/st_cb_drawpixels.c
>> +++ b/src/mesa/state_tracker/st_cb_drawpixels.c
>> @@ -1343,6 +1343,7 @@ blit_copy_pixels(struct gl_context *ctx, GLint srcx, GLint srcy,
>>          !ctx->FragmentProgram.Enabled &&
>>          !ctx->VertexProgram.Enabled &&
>>          !ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT] &&
>> +       !ctx->ATIFragmentShader._Enabled && //TODO is this needed?
> Most likely yes.
Ok, I removed the comment.
>
>>          ctx->DrawBuffer->_NumColorDrawBuffers == 1 &&
>>          !ctx->Query.CondRenderQuery &&
>>          !ctx->Query.CurrentOcclusionObject) {
>> diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c
>> index 3029909..dfe2542 100644
>> --- a/src/mesa/state_tracker/st_cb_program.c
>> +++ b/src/mesa/state_tracker/st_cb_program.c
>> @@ -35,6 +35,7 @@
>>   #include "main/enums.h"
>>   #include "main/shaderapi.h"
>>   #include "program/prog_instruction.h"
>> +#include "program/prog_parameter.h"
>>   #include "program/program.h"
>>
>>   #include "cso_cache/cso_context.h"
>> @@ -46,6 +47,7 @@
>>   #include "st_mesa_to_tgsi.h"
>>   #include "st_cb_program.h"
>>   #include "st_glsl_to_tgsi.h"
>> +#include "st_atifs_to_tgsi.h"
>>
>>
>>
>> @@ -272,6 +274,21 @@ st_program_string_notify( struct gl_context *ctx,
>>         if (st->tep == sttep)
>>            st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM;
>>      }
>> +   else if (target == GL_FRAGMENT_SHADER_ATI) {
>> +      assert(prog);
>> +
>> +      struct st_fragment_program *stfp = (struct st_fragment_program *) prog;
>> +      assert(stfp->ati_fs);
>> +      assert(stfp->ati_fs->Program == prog);
>> +
>> +      st_release_fp_variants(st, stfp);
>> +
>> +      if (st->fp == stfp)
>> +         st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM;
>> +
>> +      /* now that we have the whole shader, we can set up the metadata in @prog */
>> +      st_init_atifs_prog(ctx, prog);
>> +   }
>>
>>      if (ST_DEBUG & DEBUG_PRECOMPILE)
>>         st_precompile_shader_variant(st, prog);
>> @@ -280,6 +297,21 @@ st_program_string_notify( struct gl_context *ctx,
>>      return GL_TRUE;
>>   }
>>
>> +/**
>> + * Called via ctx->Driver.NewATIfs()
>> + * Called when a new ATI fragment shader is created with gl_bindFragmentShaderATI()
>> + */
>> +static struct gl_program *
>> +st_new_ati_fs( struct gl_context *ctx,
>> +      GLuint id)
>> +{
>> +   //TODO is this id safe to use?
>> +   struct gl_program *prog = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, id);
>> +   struct st_fragment_program *stfp = (struct st_fragment_program *)prog;
>> +   stfp->ati_fs = ctx->ATIFragmentShader.Current;
> Please, no dependencies on the current state here.
> ctx->ATIFragmentShader.Current can be passed as a parameter to this
> function.
Ok, I changed it. I assume that the answer to the TODO question is yes, 
so I removed the comment.
>
>> +   return prog;
>> +}
>> +
>>
>>   /**
>>    * Plug in the program and shader-related device driver functions.
>> @@ -293,6 +325,7 @@ st_init_program_functions(struct dd_function_table *functions)
>>      functions->DeleteProgram = st_delete_program;
>>      functions->IsProgramNative = st_is_program_native;
>>      functions->ProgramStringNotify = st_program_string_notify;
>> -
>> +   functions->NewATIfs = st_new_ati_fs;
>> +
>>      functions->LinkShader = st_link_shader;
>>   }
>> diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
>> index e62dd7a..2e79932 100644
>> --- a/src/mesa/state_tracker/st_program.c
>> +++ b/src/mesa/state_tracker/st_program.c
>> @@ -51,6 +51,7 @@
>>   #include "st_context.h"
>>   #include "st_program.h"
>>   #include "st_mesa_to_tgsi.h"
>> +#include "st_atifs_to_tgsi.h"
>>   #include "cso_cache/cso_context.h"
>>
>>
>> @@ -571,6 +572,9 @@ st_translate_fragment_program(struct st_context *st,
>>      assert(!(key->bitmap && key->drawpixels));
>>      memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
>>
>> +   //TODO what about ATI_fs and glBitmap(), glDrawPixels()?
>> +   //    the spec says nothing about those
> They should supported, of course. Luckily for you, you don't have to
> do anything. The glBitmap and glDrawPixels transformations are applied
> on TGSI.
That's a relief. I removed the comment.
>
>> +
>>      if (key->bitmap) {
>>         /* glBitmap drawing */
>>         struct gl_fragment_program *fp; /* we free this temp program below */
>> @@ -860,6 +864,24 @@ st_translate_fragment_program(struct st_context *st,
>>                              fs_output_semantic_name,
>>                              fs_output_semantic_index, FALSE,
>>                              key->clamp_color );
> You need to rebase this code. This is quite old.
It wasn't old, when I started working on this :) I'll rebase it for the 
next round.

MM