[Mesa-dev] [PATCH 3/8] st/mesa: implement GL_ATI_fragment_shader

Miklós Máté mtmkls at gmail.com
Thu Mar 24 00:12:57 UTC 2016


v2: fix arithmetic for special opcodes,
 fix fog state, cleanup
v3: simplify handling of special opcodes,
 fix rebinding with different textargets or fog equation,
 lots of formatting fixes
v4: adapt to the compile early, fix later architecture,
 formatting fixes

Signed-off-by: Miklós Máté <mtmkls at gmail.com>
---
 src/mesa/Makefile.sources                 |   1 +
 src/mesa/main/atifragshader.h             |   1 +
 src/mesa/state_tracker/st_atifs_to_tgsi.c | 845 ++++++++++++++++++++++++++++++
 src/mesa/state_tracker/st_atifs_to_tgsi.h |  67 +++
 src/mesa/state_tracker/st_atom_constbuf.c |  15 +
 src/mesa/state_tracker/st_atom_shader.c   |  65 ++-
 src/mesa/state_tracker/st_cb_drawpixels.c |   1 +
 src/mesa/state_tracker/st_cb_program.c    |  31 ++
 src/mesa/state_tracker/st_program.c       |  34 +-
 src/mesa/state_tracker/st_program.h       |   8 +
 10 files changed, 1064 insertions(+), 4 deletions(-)
 create mode 100644 src/mesa/state_tracker/st_atifs_to_tgsi.c
 create mode 100644 src/mesa/state_tracker/st_atifs_to_tgsi.h

diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index a6c12c6..54601a9 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -395,6 +395,7 @@ VBO_FILES = \
 	vbo/vbo_split_inplace.c
 
 STATETRACKER_FILES = \
+	state_tracker/st_atifs_to_tgsi.c \
 	state_tracker/st_atom_array.c \
 	state_tracker/st_atom_atomicbuf.c \
 	state_tracker/st_atom_blend.c \
diff --git a/src/mesa/main/atifragshader.h b/src/mesa/main/atifragshader.h
index 5901134..0e32795 100644
--- a/src/mesa/main/atifragshader.h
+++ b/src/mesa/main/atifragshader.h
@@ -16,6 +16,7 @@ struct gl_context;
 #define MAX_NUM_INSTRUCTIONS_PER_PASS_ATI 8
 #define MAX_NUM_PASSES_ATI                2
 #define MAX_NUM_FRAGMENT_REGISTERS_ATI    6
+#define MAX_NUM_FRAGMENT_CONSTANTS_ATI    8
 
 struct ati_fs_opcode_st
 {
diff --git a/src/mesa/state_tracker/st_atifs_to_tgsi.c b/src/mesa/state_tracker/st_atifs_to_tgsi.c
new file mode 100644
index 0000000..66f442a
--- /dev/null
+++ b/src/mesa/state_tracker/st_atifs_to_tgsi.c
@@ -0,0 +1,845 @@
+/*
+ * Copyright (C) 2016 Miklós Máté
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "main/mtypes.h"
+#include "main/atifragshader.h"
+#include "main/errors.h"
+#include "program/prog_parameter.h"
+
+#include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_transform.h"
+
+#include "st_program.h"
+#include "st_atifs_to_tgsi.h"
+
+/**
+ * Intermediate state used during shader translation.
+ */
+struct st_translate {
+   struct ureg_program *ureg;
+   struct ati_fragment_shader *atifs;
+
+   struct ureg_dst temps[MAX_PROGRAM_TEMPS];
+   struct ureg_src *constants;
+   struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
+   struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
+   struct ureg_src samplers[PIPE_MAX_SAMPLERS];
+
+   const GLuint *inputMapping;
+   const GLuint *outputMapping;
+
+   unsigned current_pass;
+
+   bool regs_written[MAX_NUM_PASSES_ATI][MAX_NUM_FRAGMENT_REGISTERS_ATI];
+
+   boolean error;
+};
+
+struct instruction_desc {
+   unsigned TGSI_opcode;
+   const char *name;
+   unsigned char arg_count;
+};
+
+static const struct instruction_desc inst_desc[] = {
+   {TGSI_OPCODE_MOV, "MOV", 1},
+   {TGSI_OPCODE_NOP, "UND", 0}, /* unused */
+   {TGSI_OPCODE_ADD, "ADD", 2},
+   {TGSI_OPCODE_MUL, "MUL", 2},
+   {TGSI_OPCODE_SUB, "SUB", 2},
+   {TGSI_OPCODE_DP3, "DOT3", 2},
+   {TGSI_OPCODE_DP4, "DOT4", 2},
+   {TGSI_OPCODE_MAD, "MAD", 3},
+   {TGSI_OPCODE_LRP, "LERP", 3},
+   {TGSI_OPCODE_NOP, "CND", 3},
+   {TGSI_OPCODE_NOP, "CND0", 3},
+   {TGSI_OPCODE_NOP, "DOT2_ADD", 3}
+};
+
+static struct ureg_dst
+get_temp(struct st_translate *t, unsigned index)
+{
+   if (ureg_dst_is_undef(t->temps[index]))
+      t->temps[index] = ureg_DECL_temporary(t->ureg);
+   return t->temps[index];
+}
+
+static struct ureg_src
+apply_swizzle(struct st_translate *t,
+              struct ureg_src src, GLuint swizzle)
+{
+   if (swizzle == GL_SWIZZLE_STR_ATI) {
+      return src;
+   } else if (swizzle == GL_SWIZZLE_STQ_ATI) {
+      return ureg_swizzle(src,
+                          TGSI_SWIZZLE_X,
+                          TGSI_SWIZZLE_Y,
+                          TGSI_SWIZZLE_W,
+                          TGSI_SWIZZLE_Z);
+   } else {
+      struct ureg_dst tmp[2];
+      struct ureg_src imm[3];
+
+      tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
+      tmp[1] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + 1);
+      imm[0] = src;
+      imm[1] = ureg_imm4f(t->ureg, 1.0f, 1.0f, 0.0f, 0.0f);
+      imm[2] = ureg_imm4f(t->ureg, 0.0f, 0.0f, 1.0f, 1.0f);
+      ureg_insn(t->ureg, TGSI_OPCODE_MAD, &tmp[0], 1, imm, 3);
+
+      if (swizzle == GL_SWIZZLE_STR_DR_ATI) {
+         imm[0] = ureg_scalar(src, TGSI_SWIZZLE_Z);
+      } else {
+         imm[0] = ureg_scalar(src, TGSI_SWIZZLE_W);
+      }
+      ureg_insn(t->ureg, TGSI_OPCODE_RCP, &tmp[1], 1, &imm[0], 1);
+
+      imm[0] = ureg_src(tmp[0]);
+      imm[1] = ureg_src(tmp[1]);
+      ureg_insn(t->ureg, TGSI_OPCODE_MUL, &tmp[0], 1, imm, 2);
+
+      return ureg_src(tmp[0]);
+   }
+}
+
+static struct ureg_src
+get_source(struct st_translate *t, GLuint src_type)
+{
+   if (src_type >= GL_REG_0_ATI && src_type <= GL_REG_5_ATI) {
+      if (t->regs_written[t->current_pass][src_type - GL_REG_0_ATI]) {
+         return ureg_src(get_temp(t, src_type - GL_REG_0_ATI));
+      } else {
+         return ureg_imm1f(t->ureg, 0.0f);
+      }
+   } else if (src_type >= GL_CON_0_ATI && src_type <= GL_CON_7_ATI) {
+      return t->constants[src_type - GL_CON_0_ATI];
+   } else if (src_type == GL_ZERO) {
+      return ureg_imm1f(t->ureg, 0.0f);
+   } else if (src_type == GL_ONE) {
+      return ureg_imm1f(t->ureg, 1.0f);
+   } else if (src_type == GL_PRIMARY_COLOR_ARB) {
+      return t->inputs[t->inputMapping[VARYING_SLOT_COL0]];
+   } else if (src_type == GL_SECONDARY_INTERPOLATOR_ATI) {
+      return t->inputs[t->inputMapping[VARYING_SLOT_COL1]];
+   } else {
+      /* frontend prevents this */
+      unreachable("unknown source");
+   }
+}
+
+static struct ureg_src
+prepare_argument(struct st_translate *t, const unsigned argId,
+                 const struct atifragshader_src_register *srcReg)
+{
+   struct ureg_src src = get_source(t, srcReg->Index);
+   struct ureg_dst arg = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + argId);
+
+   switch (srcReg->argRep) {
+   case GL_NONE:
+      break;
+   case GL_RED:
+      src = ureg_scalar(src, TGSI_SWIZZLE_X);
+      break;
+   case GL_GREEN:
+      src = ureg_scalar(src, TGSI_SWIZZLE_Y);
+      break;
+   case GL_BLUE:
+      src = ureg_scalar(src, TGSI_SWIZZLE_Z);
+      break;
+   case GL_ALPHA:
+      src = ureg_scalar(src, TGSI_SWIZZLE_W);
+      break;
+   }
+   ureg_insn(t->ureg, TGSI_OPCODE_MOV, &arg, 1, &src, 1);
+
+   if (srcReg->argMod & GL_COMP_BIT_ATI) {
+      struct ureg_src modsrc[2];
+      modsrc[0] = ureg_imm1f(t->ureg, 1.0f);
+      modsrc[1] = ureg_src(arg);
+
+      ureg_insn(t->ureg, TGSI_OPCODE_SUB, &arg, 1, modsrc, 2);
+   }
+   if (srcReg->argMod & GL_BIAS_BIT_ATI) {
+      struct ureg_src modsrc[2];
+      modsrc[0] = ureg_src(arg);
+      modsrc[1] = ureg_imm1f(t->ureg, 0.5f);
+
+      ureg_insn(t->ureg, TGSI_OPCODE_SUB, &arg, 1, modsrc, 2);
+   }
+   if (srcReg->argMod & GL_2X_BIT_ATI) {
+      struct ureg_src modsrc[2];
+      modsrc[0] = ureg_src(arg);
+      modsrc[1] = ureg_src(arg);
+
+      ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2);
+   }
+   if (srcReg->argMod & GL_NEGATE_BIT_ATI) {
+      struct ureg_src modsrc[2];
+      modsrc[0] = ureg_src(arg);
+      modsrc[1] = ureg_imm1f(t->ureg, -1.0f);
+
+      ureg_insn(t->ureg, TGSI_OPCODE_MUL, &arg, 1, modsrc, 2);
+   }
+   return  ureg_src(arg);
+}
+
+/* These instructions need special treatment */
+static void
+emit_special_inst(struct st_translate *t, const struct instruction_desc *desc,
+                  struct ureg_dst *dst, struct ureg_src *args, unsigned argcount)
+{
+   struct ureg_dst tmp[1];
+   struct ureg_src src[3];
+
+   if (!strcmp(desc->name, "CND")) {
+      tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + 2); /* re-purpose a3 */
+      src[0] = ureg_imm1f(t->ureg, 0.5f);
+      src[1] = args[2];
+      ureg_insn(t->ureg, TGSI_OPCODE_SUB, tmp, 1, src, 2);
+      src[0] = ureg_src(tmp[0]);
+      src[1] = args[0];
+      src[2] = args[1];
+      ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3);
+   } else if (!strcmp(desc->name, "CND0")) {
+      src[0] = args[2];
+      src[1] = args[1];
+      src[2] = args[0];
+      ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3);
+   } else if (!strcmp(desc->name, "DOT2_ADD")) {
+      /* note: DP2A is not implemented in most pipe drivers */
+      tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); /* re-purpose a1 */
+      src[0] = args[0];
+      src[1] = args[1];
+      ureg_insn(t->ureg, TGSI_OPCODE_DP2, tmp, 1, src, 2);
+      src[0] = ureg_src(tmp[0]);
+      src[1] = ureg_scalar(args[2], TGSI_SWIZZLE_Z);
+      ureg_insn(t->ureg, TGSI_OPCODE_ADD, dst, 1, src, 2);
+   }
+}
+
+static void
+emit_arith_inst(struct st_translate *t,
+                const struct instruction_desc *desc,
+                struct ureg_dst *dst, struct ureg_src *args, unsigned argcount)
+{
+   if (desc->TGSI_opcode == TGSI_OPCODE_NOP) {
+      return emit_special_inst(t, desc, dst, args, argcount);
+   }
+
+   ureg_insn(t->ureg, desc->TGSI_opcode, dst, 1, args, argcount);
+}
+
+static void
+emit_dstmod(struct st_translate *t,
+            struct ureg_dst dst, GLuint dstMod)
+{
+   float imm;
+   struct ureg_src src[3];
+   GLuint scale = dstMod & ~GL_SATURATE_BIT_ATI;
+
+   if (dstMod == GL_NONE) {
+      return;
+   }
+
+   switch (scale) {
+   case GL_2X_BIT_ATI:
+      imm = 2.0f;
+      break;
+   case GL_4X_BIT_ATI:
+      imm = 4.0f;
+      break;
+   case GL_8X_BIT_ATI:
+      imm = 8.0f;
+      break;
+   case GL_HALF_BIT_ATI:
+      imm = 0.5f;
+      break;
+   case GL_QUARTER_BIT_ATI:
+      imm = 0.25f;
+      break;
+   case GL_EIGHTH_BIT_ATI:
+      imm = 0.125f;
+      break;
+   default:
+      imm = 1.0f;
+   }
+
+   src[0] = ureg_src(dst);
+   src[1] = ureg_imm1f(t->ureg, imm);
+   if (dstMod & GL_SATURATE_BIT_ATI) {
+      dst = ureg_saturate(dst);
+   }
+   ureg_insn(t->ureg, TGSI_OPCODE_MUL, &dst, 1, src, 2);
+}
+
+/**
+ * Compile one setup instruction to TGSI instructions.
+ */
+static void
+compile_setupinst(struct st_translate *t,
+                  const unsigned r,
+                  const struct atifs_setupinst *texinst)
+{
+   struct ureg_dst dst[1];
+   struct ureg_src src[2];
+
+   if (!texinst->Opcode)
+      return;
+
+   dst[0] = get_temp(t, r);
+
+   GLuint pass_tex = texinst->src;
+
+   if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
+      unsigned attr = pass_tex - GL_TEXTURE0_ARB + VARYING_SLOT_TEX0;
+
+      src[0] = t->inputs[t->inputMapping[attr]];
+   } else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
+      unsigned reg = pass_tex - GL_REG_0_ATI;
+
+      /* the frontend already validated that REG is only allowed in second pass */
+      if (t->regs_written[0][reg]) {
+         src[0] = ureg_src(t->temps[reg]);
+      } else {
+         src[0] = ureg_imm1f(t->ureg, 0.0f);
+      }
+   }
+   src[0] = apply_swizzle(t, src[0], texinst->swizzle);
+
+   if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) {
+      /* by default texture and sampler indexes are the same */
+      src[1] = t->samplers[r];
+      /* the texture target is still unknown, it will be fixed in the draw call */
+      ureg_tex_insn(t->ureg, TGSI_OPCODE_TEX, dst, 1, TGSI_TEXTURE_2D,
+                    NULL, 0, src, 2);
+   } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
+      ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1);
+   }
+
+   t->regs_written[t->current_pass][r] = true;
+}
+
+/**
+ * Compile one arithmetic operation COLOR&ALPHA pair into TGSI instructions.
+ */
+static void
+compile_instruction(struct st_translate *t,
+                    const struct atifs_instruction *inst)
+{
+   unsigned optype;
+
+   for (optype = 0; optype < 2; optype++) { /* color, alpha */
+      const struct instruction_desc *desc;
+      struct ureg_dst dst[1];
+      struct ureg_src args[3]; /* arguments for the main operation */
+      unsigned arg;
+      unsigned dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI;
+
+      if (!inst->Opcode[optype])
+         continue;
+
+      desc = &inst_desc[inst->Opcode[optype] - GL_MOV_ATI];
+
+      /* prepare the arguments */
+      for (arg = 0; arg < desc->arg_count; arg++) {
+         if (arg >= inst->ArgCount[optype]) {
+            _mesa_warning(0, "Using 0 for missing argument %d of %s\n",
+                          arg, desc->name);
+            args[arg] = ureg_imm1f(t->ureg, 0.0f);
+         } else {
+            args[arg] = prepare_argument(t, arg,
+                                         &inst->SrcReg[optype][arg]);
+         }
+      }
+
+      /* prepare dst */
+      dst[0] = get_temp(t, dstreg);
+
+      if (optype) {
+         dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_W);
+      } else {
+         GLuint dstMask = inst->DstReg[optype].dstMask;
+         if (dstMask == GL_NONE) {
+            dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ);
+         } else {
+            dst[0] = ureg_writemask(dst[0], dstMask); /* the enum values match */
+         }
+      }
+
+      /* emit the main instruction */
+      emit_arith_inst(t, desc, dst, args, arg);
+
+      emit_dstmod(t, *dst, inst->DstReg[optype].dstMod);
+
+      t->regs_written[t->current_pass][dstreg] = true;
+   }
+}
+
+static void
+finalize_shader(struct st_translate *t, unsigned numPasses)
+{
+   struct ureg_dst dst[1] = { { 0 } };
+   struct ureg_src src[1] = { { 0 } };
+
+   if (t->regs_written[numPasses-1][0]) {
+      /* copy the result into the OUT slot */
+      dst[0] = t->outputs[t->outputMapping[FRAG_RESULT_COLOR]];
+      src[0] = ureg_src(t->temps[0]);
+      ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1);
+   }
+
+   /* signal the end of the program */
+   ureg_insn(t->ureg, TGSI_OPCODE_END, dst, 0, src, 0);
+}
+
+/**
+ * Called when a new variant is needed, we need to translate
+ * the ATI fragment shader to TGSI
+ */
+enum pipe_error
+st_translate_atifs_program(
+   struct ureg_program *ureg,
+   struct ati_fragment_shader *atifs,
+   struct gl_program *program,
+   GLuint numInputs,
+   const GLuint inputMapping[],
+   const ubyte inputSemanticName[],
+   const ubyte inputSemanticIndex[],
+   const GLuint interpMode[],
+   GLuint numOutputs,
+   const GLuint outputMapping[],
+   const ubyte outputSemanticName[],
+   const ubyte outputSemanticIndex[])
+{
+   enum pipe_error ret = PIPE_OK;
+
+   unsigned pass, i, r;
+
+   struct st_translate translate, *t;
+   t = &translate;
+   memset(t, 0, sizeof *t);
+
+   t->inputMapping = inputMapping;
+   t->outputMapping = outputMapping;
+   t->ureg = ureg;
+   t->atifs = atifs;
+
+   /*
+    * Declare input attributes.
+    */
+   for (i = 0; i < numInputs; i++) {
+      t->inputs[i] = ureg_DECL_fs_input(ureg,
+                                        inputSemanticName[i],
+                                        inputSemanticIndex[i],
+                                        interpMode[i]);
+   }
+
+   /*
+    * Declare output attributes:
+    *  we always have numOutputs=1 and it's FRAG_RESULT_COLOR
+    */
+   t->outputs[0] = ureg_DECL_output(ureg,
+                                    TGSI_SEMANTIC_COLOR,
+                                    outputSemanticIndex[0]);
+
+   /* Emit constants and immediates.  Mesa uses a single index space
+    * for these, so we put all the translated regs in t->constants.
+    */
+   if (program->Parameters) {
+      t->constants = calloc(program->Parameters->NumParameters,
+                            sizeof t->constants[0]);
+      if (t->constants == NULL) {
+         ret = PIPE_ERROR_OUT_OF_MEMORY;
+         goto out;
+      }
+
+      for (i = 0; i < program->Parameters->NumParameters; i++) {
+         switch (program->Parameters->Parameters[i].Type) {
+         case PROGRAM_STATE_VAR:
+         case PROGRAM_UNIFORM:
+            t->constants[i] = ureg_DECL_constant(ureg, i);
+            break;
+         case PROGRAM_CONSTANT:
+            t->constants[i] =
+               ureg_DECL_immediate(ureg,
+                                   (const float*)program->Parameters->ParameterValues[i],
+                                   4);
+            break;
+         default:
+            break;
+         }
+      }
+   }
+
+   /* texture samplers */
+   for (i = 0; i < MAX_NUM_FRAGMENT_REGISTERS_ATI; i++) {
+      if (program->SamplersUsed & (1 << i)) {
+         t->samplers[i] = ureg_DECL_sampler(ureg, i);
+         /* the texture target is still unknown, it will be fixed in the draw call */
+         ureg_DECL_sampler_view(ureg, i, TGSI_TEXTURE_2D,
+                                TGSI_RETURN_TYPE_FLOAT,
+                                TGSI_RETURN_TYPE_FLOAT,
+                                TGSI_RETURN_TYPE_FLOAT,
+                                TGSI_RETURN_TYPE_FLOAT);
+      }
+   }
+
+   /* emit instructions */
+   for (pass = 0; pass < atifs->NumPasses; pass++) {
+      t->current_pass = pass;
+      for (r = 0; r < MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) {
+         struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r];
+         compile_setupinst(t, r, texinst);
+      }
+      for (i = 0; i < atifs->numArithInstr[pass]; i++) {
+         struct atifs_instruction *inst = &atifs->Instructions[pass][i];
+         compile_instruction(t, inst);
+      }
+   }
+
+   finalize_shader(t, atifs->NumPasses);
+
+out:
+   free(t->constants);
+
+   if (t->error) {
+      debug_printf("%s: translate error flag set\n", __func__);
+   }
+
+   return ret;
+}
+
+/**
+ * Called in ProgramStringNotify, we need to fill the metadata of the
+ * gl_program attached to the ati_fragment_shader
+ */
+void
+st_init_atifs_prog(struct gl_context *ctx, struct gl_program *prog)
+{
+   /* we know this is st_fragment_program, because of st_new_ati_fs() */
+   struct st_fragment_program *stfp = (struct st_fragment_program *) prog;
+   struct ati_fragment_shader *atifs = stfp->ati_fs;
+
+   unsigned pass, i, r, optype, arg;
+
+   static const gl_state_index fog_params_state[STATE_LENGTH] =
+      {STATE_INTERNAL, STATE_FOG_PARAMS_OPTIMIZED, 0, 0, 0};
+   static const gl_state_index fog_color[STATE_LENGTH] =
+      {STATE_FOG_COLOR, 0, 0, 0, 0};
+
+   prog->InputsRead = 0;
+   prog->OutputsWritten = BITFIELD64_BIT(FRAG_RESULT_COLOR);
+   prog->SamplersUsed = 0;
+   prog->Parameters = _mesa_new_parameter_list();
+
+   /* fill in InputsRead, SamplersUsed, TexturesUsed */
+   for (pass = 0; pass < atifs->NumPasses; pass++) {
+      for (r = 0; r < MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) {
+         struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r];
+         GLuint pass_tex = texinst->src;
+
+         if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) {
+            /* mark which texcoords are used */
+            prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB);
+            /* by default there is 1:1 mapping between samplers and textures */
+            prog->SamplersUsed |= (1 << r);
+            /* the target is unknown here, it will be fixed in the draw call */
+            prog->TexturesUsed[r] = TEXTURE_2D_BIT;
+         } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
+            if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
+               prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB);
+            }
+         }
+      }
+   }
+   for (pass = 0; pass < atifs->NumPasses; pass++) {
+      for (i = 0; i < atifs->numArithInstr[pass]; i++) {
+         struct atifs_instruction *inst = &atifs->Instructions[pass][i];
+
+         for (optype = 0; optype < 2; optype++) { /* color, alpha */
+            if (inst->Opcode[optype]) {
+               for (arg = 0; arg < inst->ArgCount[optype]; arg++) {
+                  GLint index = inst->SrcReg[optype][arg].Index;
+                  if (index == GL_PRIMARY_COLOR_EXT) {
+                     prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_COL0);
+                  } else if (index == GL_SECONDARY_INTERPOLATOR_ATI) {
+                     /* note: ATI_fragment_shader.txt never specifies what
+                      * GL_SECONDARY_INTERPOLATOR_ATI is, swrast uses
+                      * VARYING_SLOT_COL1 for this input */
+                     prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_COL1);
+                  }
+               }
+            }
+         }
+      }
+   }
+   /* we may need fog */
+   prog->InputsRead |= BITFIELD64_BIT(VARYING_SLOT_FOGC);
+
+   /* we always have the ATI_fs constants, and the fog params */
+   for (i = 0; i < MAX_NUM_FRAGMENT_CONSTANTS_ATI; i++) {
+      _mesa_add_parameter(prog->Parameters, PROGRAM_UNIFORM,
+                          NULL, 4, GL_FLOAT, NULL, NULL);
+   }
+   _mesa_add_state_reference(prog->Parameters, fog_params_state);
+   _mesa_add_state_reference(prog->Parameters, fog_color);
+
+   prog->NumInstructions = 0;
+   prog->NumTemporaries = MAX_NUM_FRAGMENT_REGISTERS_ATI + 3; /* 3 input temps for arith ops */
+   prog->NumParameters = MAX_NUM_FRAGMENT_CONSTANTS_ATI + 2; /* 2 state variables for fog */
+}
+
+
+struct tgsi_atifs_transform {
+   struct tgsi_transform_context base;
+   struct tgsi_shader_info info;
+   const struct st_fp_variant_key *key;
+   bool first_instruction_emitted;
+   unsigned fog_factor_temp;
+   unsigned fog_clamp_imm;
+};
+
+static inline struct tgsi_atifs_transform *
+tgsi_atifs_transform(struct tgsi_transform_context *tctx)
+{
+   return (struct tgsi_atifs_transform *)tctx;
+}
+
+/* copied from st_cb_drawpixels_shader.c */
+static void
+set_src(struct tgsi_full_instruction *inst, unsigned i, unsigned file, unsigned index,
+        unsigned x, unsigned y, unsigned z, unsigned w)
+{
+   inst->Src[i].Register.File  = file;
+   inst->Src[i].Register.Index = index;
+   inst->Src[i].Register.SwizzleX = x;
+   inst->Src[i].Register.SwizzleY = y;
+   inst->Src[i].Register.SwizzleZ = z;
+   inst->Src[i].Register.SwizzleW = w;
+}
+
+#define SET_SRC(inst, i, file, index, x, y, z, w) \
+   set_src(inst, i, file, index, TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, \
+           TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w)
+
+static void
+transform_decl(struct tgsi_transform_context *tctx,
+               struct tgsi_full_declaration *decl)
+{
+   struct tgsi_atifs_transform *ctx = tgsi_atifs_transform(tctx);
+
+   if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
+      /* fix texture target */
+      unsigned newtarget = ctx->key->texture_targets[decl->Range.First];
+      if (newtarget)
+         decl->SamplerView.Resource = newtarget;
+   }
+
+   tctx->emit_declaration(tctx, decl);
+}
+
+static void
+transform_instr(struct tgsi_transform_context *tctx,
+                struct tgsi_full_instruction *current_inst)
+{
+   struct tgsi_atifs_transform *ctx = tgsi_atifs_transform(tctx);
+
+   if (ctx->first_instruction_emitted)
+      goto transform_inst;
+
+   ctx->first_instruction_emitted = true;
+
+   if (ctx->key->fog) {
+      /* add a new temp for the fog factor */
+      ctx->fog_factor_temp = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 1;
+      tgsi_transform_temp_decl(tctx, ctx->fog_factor_temp);
+
+      /* add immediates for clamp */
+      ctx->fog_clamp_imm = ctx->info.immediate_count;
+      tgsi_transform_immediate_decl(tctx, 1.0f, 0.0f, 0.0f, 0.0f);
+   }
+
+transform_inst:
+   if (current_inst->Instruction.Opcode == TGSI_OPCODE_TEX) {
+      /* fix texture target */
+      unsigned newtarget = ctx->key->texture_targets[current_inst->Src[1].Register.Index];
+      if (newtarget)
+         current_inst->Texture.Texture = newtarget;
+
+   } else if (ctx->key->fog && current_inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
+              current_inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
+      struct tgsi_full_instruction inst;
+      unsigned i;
+      int fogc_index = -1;
+
+      /* find FOGC input */
+      for (i = 0; i < ctx->info.num_inputs; i++) {
+         if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FOG) {
+            fogc_index = i;
+            break;
+         }
+      }
+      if (fogc_index < 0) {
+         /* should never be reached, because fog coord input is always declared */
+         tctx->emit_instruction(tctx, current_inst);
+         return;
+      }
+
+      /* compute the 1 component fog factor f */
+      if (ctx->key->fog == 1) {
+         /* LINEAR formula: f = (end - z) / (end - start)
+          * with optimized parameters:
+          *    f = MAD(fogcoord, oparams.x, oparams.y)
+          */
+         inst = tgsi_default_full_instruction();
+         inst.Instruction.Opcode = TGSI_OPCODE_MAD;
+         inst.Instruction.NumDstRegs = 1;
+         inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
+         inst.Dst[0].Register.Index = ctx->fog_factor_temp;
+         inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+         inst.Instruction.NumSrcRegs = 3;
+         SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W);
+         SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, X, X, X, X);
+         SET_SRC(&inst, 2, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, Y, Y, Y, Y);
+         tctx->emit_instruction(tctx, &inst);
+      } else if (ctx->key->fog == 2) {
+         /* EXP formula: f = exp(-dens * z)
+          * with optimized parameters:
+          *    f = MUL(fogcoord, oparams.z); f= EX2(-f)
+          */
+         inst = tgsi_default_full_instruction();
+         inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+         inst.Instruction.NumDstRegs = 1;
+         inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
+         inst.Dst[0].Register.Index = ctx->fog_factor_temp;
+         inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+         inst.Instruction.NumSrcRegs = 2;
+         SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W);
+         SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, Z, Z, Z, Z);
+         tctx->emit_instruction(tctx, &inst);
+
+         inst = tgsi_default_full_instruction();
+         inst.Instruction.Opcode = TGSI_OPCODE_EX2;
+         inst.Instruction.NumDstRegs = 1;
+         inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
+         inst.Dst[0].Register.Index = ctx->fog_factor_temp;
+         inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+         inst.Instruction.NumSrcRegs = 1;
+         SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
+         inst.Src[0].Register.Negate = 1;
+         tctx->emit_instruction(tctx, &inst);
+      } else if (ctx->key->fog == 3) {
+         /* EXP2 formula: f = exp(-(dens * z)^2)
+          * with optimized parameters:
+          *    f = MUL(fogcoord, oparams.w); f=MUL(f, f); f= EX2(-f)
+          */
+         inst = tgsi_default_full_instruction();
+         inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+         inst.Instruction.NumDstRegs = 1;
+         inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
+         inst.Dst[0].Register.Index = ctx->fog_factor_temp;
+         inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+         inst.Instruction.NumSrcRegs = 2;
+         SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W);
+         SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, W, W, W, W);
+         tctx->emit_instruction(tctx, &inst);
+
+         inst = tgsi_default_full_instruction();
+         inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+         inst.Instruction.NumDstRegs = 1;
+         inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
+         inst.Dst[0].Register.Index = ctx->fog_factor_temp;
+         inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+         inst.Instruction.NumSrcRegs = 2;
+         SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
+         SET_SRC(&inst, 1, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
+         tctx->emit_instruction(tctx, &inst);
+
+         inst = tgsi_default_full_instruction();
+         inst.Instruction.Opcode = TGSI_OPCODE_EX2;
+         inst.Instruction.NumDstRegs = 1;
+         inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
+         inst.Dst[0].Register.Index = ctx->fog_factor_temp;
+         inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+         inst.Instruction.NumSrcRegs = 1;
+         SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
+         inst.Src[0].Register.Negate ^= 1;
+         tctx->emit_instruction(tctx, &inst);
+      }
+      /* f = CLAMP(f, 0.0, 1.0) */
+      inst = tgsi_default_full_instruction();
+      inst.Instruction.Opcode = TGSI_OPCODE_CLAMP;
+      inst.Instruction.NumDstRegs = 1;
+      inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
+      inst.Dst[0].Register.Index = ctx->fog_factor_temp;
+      inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+      inst.Instruction.NumSrcRegs = 3;
+      SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
+      SET_SRC(&inst, 1, TGSI_FILE_IMMEDIATE, ctx->fog_clamp_imm, Y, Y, Y, Y); // 0.0
+      SET_SRC(&inst, 2, TGSI_FILE_IMMEDIATE, ctx->fog_clamp_imm, X, X, X, X); // 1.0
+      tctx->emit_instruction(tctx, &inst);
+
+      /* REG0 = LRP(f, REG0, fogcolor) */
+      inst = tgsi_default_full_instruction();
+      inst.Instruction.Opcode = TGSI_OPCODE_LRP;
+      inst.Instruction.NumDstRegs = 1;
+      inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
+      inst.Dst[0].Register.Index = 0;
+      inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+      inst.Instruction.NumSrcRegs = 3;
+      SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, X, X, Y);
+      SET_SRC(&inst, 1, TGSI_FILE_TEMPORARY, 0, X, Y, Z, W);
+      SET_SRC(&inst, 2, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI + 1, X, Y, Z, W);
+      tctx->emit_instruction(tctx, &inst);
+   }
+
+   tctx->emit_instruction(tctx, current_inst);
+}
+
+/*
+ * A post-process step in the draw call to fix texture targets and
+ * insert code for fog.
+ */
+const struct tgsi_token *
+st_fixup_atifs(const struct tgsi_token *tokens,
+               const struct st_fp_variant_key *key)
+{
+   struct tgsi_atifs_transform ctx;
+   struct tgsi_token *newtoks;
+   int newlen;
+
+   memset(&ctx, 0, sizeof(ctx));
+   ctx.base.transform_declaration = transform_decl;
+   ctx.base.transform_instruction = transform_instr;
+   ctx.key = key;
+   tgsi_scan_shader(tokens, &ctx.info);
+
+   newlen = tgsi_num_tokens(tokens) + 30;
+   newtoks = tgsi_alloc_tokens(newlen);
+   if (!newtoks)
+      return NULL;
+
+   tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
+   return newtoks;
+}
+
diff --git a/src/mesa/state_tracker/st_atifs_to_tgsi.h b/src/mesa/state_tracker/st_atifs_to_tgsi.h
new file mode 100644
index 0000000..c1b6758
--- /dev/null
+++ b/src/mesa/state_tracker/st_atifs_to_tgsi.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2016 Miklós Máté
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef ST_ATIFS_TO_TGSI_H
+#define ST_ATIFS_TO_TGSI_H
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+#include "main/glheader.h"
+#include "pipe/p_defines.h"
+
+struct gl_context;
+struct gl_program;
+struct ureg_program;
+struct tgsi_token;
+struct ati_fragment_shader;
+struct st_fp_variant_key;
+
+enum pipe_error
+st_translate_atifs_program(
+    struct ureg_program *ureg,
+    struct ati_fragment_shader *atifs,
+    struct gl_program *program,
+    GLuint numInputs,
+    const GLuint inputMapping[],
+    const ubyte inputSemanticName[],
+    const ubyte inputSemanticIndex[],
+    const GLuint interpMode[],
+    GLuint numOutputs,
+    const GLuint outputMapping[],
+    const ubyte outputSemanticName[],
+    const ubyte outputSemanticIndex[]);
+
+
+void
+st_init_atifs_prog(struct gl_context *ctx, struct gl_program *prog);
+
+const struct tgsi_token *
+st_fixup_atifs(const struct tgsi_token *tokens,
+               const struct st_fp_variant_key *key);
+
+#if defined __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* ST_ATIFS_TO_TGSI_H */
diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c
index 407dfd3..4d9b344 100644
--- a/src/mesa/state_tracker/st_atom_constbuf.c
+++ b/src/mesa/state_tracker/st_atom_constbuf.c
@@ -64,6 +64,21 @@ void st_upload_constants( struct st_context *st,
           shader_type == PIPE_SHADER_TESS_EVAL ||
           shader_type == PIPE_SHADER_COMPUTE);
 
+   /* update the ATI constants before rendering */
+   struct ati_fragment_shader *ati_fs = st->fp->ati_fs;
+   if (shader_type == PIPE_SHADER_FRAGMENT && ati_fs) {
+      unsigned c;
+
+      for (c = 0; c < MAX_NUM_FRAGMENT_CONSTANTS_ATI; c++) {
+         if (ati_fs->LocalConstDef & (1 << c))
+            memcpy(params->ParameterValues[c],
+                   ati_fs->Constants[c], sizeof(GLfloat) * 4);
+         else
+            memcpy(params->ParameterValues[c],
+                   st->ctx->ATIFragmentShader.GlobalConstants[c], sizeof(GLfloat) * 4);
+      }
+   }
+
    /* update constants */
    if (params && params->NumParameters) {
       struct pipe_constant_buffer cb;
diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c
index ff90bd6..82997c1 100644
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -38,18 +38,69 @@
 #include "main/imports.h"
 #include "main/mtypes.h"
 #include "main/framebuffer.h"
+#include "main/texobj.h"
+#include "main/texstate.h"
 #include "program/program.h"
 
 #include "pipe/p_context.h"
 #include "pipe/p_shader_tokens.h"
 #include "util/u_simple_shaders.h"
 #include "cso_cache/cso_context.h"
+#include "util/u_debug.h"
 
 #include "st_context.h"
 #include "st_atom.h"
 #include "st_program.h"
 
 
+/** Compress the fog function enums into a 2-bit value */
+static GLuint
+translate_fog_mode(GLenum mode)
+{
+   switch (mode) {
+   case GL_LINEAR: return 1;
+   case GL_EXP:    return 2;
+   case GL_EXP2:   return 3;
+   default:
+      return 0;
+   }
+}
+
+static unsigned
+get_texture_target(struct gl_context *ctx, const unsigned unit)
+{
+   struct gl_texture_object *texObj = _mesa_get_tex_unit(ctx, unit)->_Current;
+   gl_texture_index index;
+
+   if (texObj) {
+      index = _mesa_tex_target_to_index(ctx, texObj->Target);
+   } else {
+      /* fallback for missing texture */
+      index = TEXTURE_2D_INDEX;
+   }
+
+   /* Map mesa texture target to TGSI texture target.
+    * Copied from st_mesa_to_tgsi.c, the shadow part is omitted */
+   switch(index) {
+   case TEXTURE_2D_MULTISAMPLE_INDEX: return TGSI_TEXTURE_2D_MSAA;
+   case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX: return TGSI_TEXTURE_2D_ARRAY_MSAA;
+   case TEXTURE_BUFFER_INDEX: return TGSI_TEXTURE_BUFFER;
+   case TEXTURE_1D_INDEX:   return TGSI_TEXTURE_1D;
+   case TEXTURE_2D_INDEX:   return TGSI_TEXTURE_2D;
+   case TEXTURE_3D_INDEX:   return TGSI_TEXTURE_3D;
+   case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE;
+   case TEXTURE_CUBE_ARRAY_INDEX: return TGSI_TEXTURE_CUBE_ARRAY;
+   case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT;
+   case TEXTURE_1D_ARRAY_INDEX:   return TGSI_TEXTURE_1D_ARRAY;
+   case TEXTURE_2D_ARRAY_INDEX:   return TGSI_TEXTURE_2D_ARRAY;
+   case TEXTURE_EXTERNAL_INDEX:   return TGSI_TEXTURE_2D;
+   default:
+      debug_assert(0);
+      return TGSI_TEXTURE_1D;
+   }
+}
+
+
 /**
  * Update fragment program state/atom.  This involves translating the
  * Mesa fragment program into a gallium fragment program and binding it.
@@ -79,6 +130,18 @@ update_fp( struct st_context *st )
       st->ctx->Multisample.MinSampleShadingValue *
       _mesa_geometric_samples(st->ctx->DrawBuffer) > 1;
 
+   if (stfp->ati_fs) {
+      unsigned u;
+
+      if (st->ctx->Fog.Enabled) {
+         key.fog = translate_fog_mode(st->ctx->Fog.Mode);
+      }
+
+      for (u = 0; u < MAX_NUM_FRAGMENT_REGISTERS_ATI; u++) {
+         key.texture_targets[u] = get_texture_target(st->ctx, u);
+      }
+   }
+
    st->fp_variant = st_get_fp_variant(st, stfp, &key);
 
    st_reference_fragprog(st, &st->fp, stfp);
@@ -91,7 +154,7 @@ update_fp( struct st_context *st )
 const struct st_tracked_state st_update_fp = {
    "st_update_fp",					/* name */
    {							/* dirty */
-      _NEW_BUFFERS | _NEW_MULTISAMPLE,			/* mesa */
+      _NEW_BUFFERS | _NEW_MULTISAMPLE | _NEW_FOG,	/* mesa */
       ST_NEW_FRAGMENT_PROGRAM                           /* st */
    },
    update_fp  					/* update */
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 09f4d8e..01ed544 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -1302,6 +1302,7 @@ blit_copy_pixels(struct gl_context *ctx, GLint srcx, GLint srcy,
        !ctx->FragmentProgram.Enabled &&
        !ctx->VertexProgram.Enabled &&
        !ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT] &&
+       !ctx->ATIFragmentShader._Enabled &&
        ctx->DrawBuffer->_NumColorDrawBuffers == 1 &&
        !ctx->Query.CondRenderQuery &&
        !ctx->Query.CurrentOcclusionObject) {
diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c
index 27cc0f3..d79cfe2 100644
--- a/src/mesa/state_tracker/st_cb_program.c
+++ b/src/mesa/state_tracker/st_cb_program.c
@@ -46,6 +46,7 @@
 #include "st_mesa_to_tgsi.h"
 #include "st_cb_program.h"
 #include "st_glsl_to_tgsi.h"
+#include "st_atifs_to_tgsi.h"
 
 
 
@@ -302,6 +303,22 @@ st_program_string_notify( struct gl_context *ctx,
       if (st->cp == stcp)
          st->dirty_cp.st |= ST_NEW_COMPUTE_PROGRAM;
    }
+   else if (target == GL_FRAGMENT_SHADER_ATI) {
+      assert(prog);
+
+      struct st_fragment_program *stfp = (struct st_fragment_program *) prog;
+      assert(stfp->ati_fs);
+      assert(stfp->ati_fs->Program == prog);
+
+      st_init_atifs_prog(ctx, prog);
+
+      st_release_fp_variants(st, stfp);
+      if (!st_translate_fragment_program(st, stfp))
+         return false;
+
+      if (st->fp == stfp)
+         st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM;
+   }
 
    if (ST_DEBUG & DEBUG_PRECOMPILE ||
        st->shader_has_one_variant[stage])
@@ -310,6 +327,19 @@ st_program_string_notify( struct gl_context *ctx,
    return GL_TRUE;
 }
 
+/**
+ * Called via ctx->Driver.NewATIfs()
+ * Called in glEndFragmentShaderATI()
+ */
+static struct gl_program *
+st_new_ati_fs(struct gl_context *ctx, struct ati_fragment_shader *curProg)
+{
+   struct gl_program *prog = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB,
+         curProg->Id);
+   struct st_fragment_program *stfp = (struct st_fragment_program *)prog;
+   stfp->ati_fs = curProg;
+   return prog;
+}
 
 /**
  * Plug in the program and shader-related device driver functions.
@@ -322,6 +352,7 @@ st_init_program_functions(struct dd_function_table *functions)
    functions->NewProgram = st_new_program;
    functions->DeleteProgram = st_delete_program;
    functions->ProgramStringNotify = st_program_string_notify;
+   functions->NewATIfs = st_new_ati_fs;
    
    functions->LinkShader = st_link_shader;
 }
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 80dcfd8..94dc489 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -53,6 +53,7 @@
 #include "st_context.h"
 #include "st_program.h"
 #include "st_mesa_to_tgsi.h"
+#include "st_atifs_to_tgsi.h"
 #include "cso_cache/cso_context.h"
 
 
@@ -811,7 +812,22 @@ st_translate_fragment_program(struct st_context *st,
 
       free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
       stfp->glsl_to_tgsi = NULL;
-   } else
+   } else if (stfp->ati_fs)
+      st_translate_atifs_program(ureg,
+                                 stfp->ati_fs,
+                                 &stfp->Base.Base,
+                                 /* inputs */
+                                 fs_num_inputs,
+                                 inputMapping,
+                                 input_semantic_name,
+                                 input_semantic_index,
+                                 interpMode,
+                                 /* outputs */
+                                 fs_num_outputs,
+                                 outputMapping,
+                                 fs_output_semantic_name,
+                                 fs_output_semantic_index);
+   else
       st_translate_mesa_program(st->ctx,
                                 TGSI_PROCESSOR_FRAGMENT,
                                 ureg,
@@ -849,6 +865,16 @@ st_create_fp_variant(struct st_context *st,
 
    assert(!(key->bitmap && key->drawpixels));
 
+   /* Fix texture targets and add fog for ATI_fs */
+   if (stfp->ati_fs) {
+      const struct tgsi_token *tokens = st_fixup_atifs(tgsi.tokens, key);
+
+      if (tokens)
+         tgsi.tokens = tokens;
+      else
+         fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
+   }
+
    /* Emulate features. */
    if (key->clamp_color || key->persample_shading) {
       const struct tgsi_token *tokens;
@@ -858,9 +884,11 @@ st_create_fp_variant(struct st_context *st,
 
       tokens = tgsi_emulate(tgsi.tokens, flags);
 
-      if (tokens)
+      if (tokens) {
+         if (tgsi.tokens != stfp->tgsi.tokens)
+            tgsi_free_tokens(tgsi.tokens);
          tgsi.tokens = tokens;
-      else
+      } else
          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
    }
 
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index 028fba9..7c90fd7 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -35,6 +35,7 @@
 #define ST_PROGRAM_H
 
 #include "main/mtypes.h"
+#include "main/atifragshader.h"
 #include "program/program.h"
 #include "pipe/p_state.h"
 #include "st_context.h"
@@ -65,6 +66,12 @@ struct st_fp_variant_key
 
    /** for ARB_sample_shading */
    GLuint persample_shading:1;
+
+   /** needed for ATI_fragment_shader */
+   GLuint fog:2;
+
+   /** needed for ATI_fragment_shader */
+   char texture_targets[MAX_NUM_FRAGMENT_REGISTERS_ATI];
 };
 
 
@@ -99,6 +106,7 @@ struct st_fragment_program
    struct gl_fragment_program Base;
    struct pipe_shader_state tgsi;
    struct glsl_to_tgsi_visitor* glsl_to_tgsi;
+   struct ati_fragment_shader *ati_fs;
 
    struct st_fp_variant *variants;
 };
-- 
2.8.0.rc3



More information about the mesa-dev mailing list