[Mesa-dev] [PATCH 2/7] st/mesa: implement GL_ATI_fragment_shader

Marek Olšák maraeo at gmail.com
Thu Feb 25 10:40:30 UTC 2016


On Thu, Feb 25, 2016 at 12:35 AM, Miklós Máté <mtmkls at gmail.com> wrote:
> v2: fix arithmetic for special opcodes,
>  fix fog state, cleanup
> v3: simplify handling of special opcodes,
>  fix rebinding with different textargets or fog equation,
>  lots of formatting fixes
>
> Signed-off-by: Miklós Máté <mtmkls at gmail.com>
> ---
>  src/mesa/Makefile.sources                 |   1 +
>  src/mesa/main/atifragshader.h             |   1 +
>  src/mesa/main/texstate.c                  |  18 +
>  src/mesa/main/texstate.h                  |   3 +
>  src/mesa/program/program.h                |   2 +
>  src/mesa/state_tracker/st_atifs_to_tgsi.c | 726 ++++++++++++++++++++++++++++++
>  src/mesa/state_tracker/st_atifs_to_tgsi.h |  65 +++
>  src/mesa/state_tracker/st_atom_constbuf.c |  16 +
>  src/mesa/state_tracker/st_atom_shader.c   |  27 +-
>  src/mesa/state_tracker/st_cb_drawpixels.c |   1 +
>  src/mesa/state_tracker/st_cb_program.c    |  36 +-
>  src/mesa/state_tracker/st_program.c       |  30 +-
>  src/mesa/state_tracker/st_program.h       |   7 +
>  13 files changed, 930 insertions(+), 3 deletions(-)
>  create mode 100644 src/mesa/state_tracker/st_atifs_to_tgsi.c
>  create mode 100644 src/mesa/state_tracker/st_atifs_to_tgsi.h
>
> diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
> index a6c12c6..54601a9 100644
> --- a/src/mesa/Makefile.sources
> +++ b/src/mesa/Makefile.sources
> @@ -395,6 +395,7 @@ VBO_FILES = \
>         vbo/vbo_split_inplace.c
>
>  STATETRACKER_FILES = \
> +       state_tracker/st_atifs_to_tgsi.c \
>         state_tracker/st_atom_array.c \
>         state_tracker/st_atom_atomicbuf.c \
>         state_tracker/st_atom_blend.c \
> diff --git a/src/mesa/main/atifragshader.h b/src/mesa/main/atifragshader.h
> index 5901134..0e32795 100644
> --- a/src/mesa/main/atifragshader.h
> +++ b/src/mesa/main/atifragshader.h
> @@ -16,6 +16,7 @@ struct gl_context;
>  #define MAX_NUM_INSTRUCTIONS_PER_PASS_ATI 8
>  #define MAX_NUM_PASSES_ATI                2
>  #define MAX_NUM_FRAGMENT_REGISTERS_ATI    6
> +#define MAX_NUM_FRAGMENT_CONSTANTS_ATI    8
>
>  struct ati_fs_opcode_st
>  {
> diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c
> index 9ee5c69..1f7aa4a 100644
> --- a/src/mesa/main/texstate.c
> +++ b/src/mesa/main/texstate.c
> @@ -57,6 +57,24 @@ static const struct gl_tex_env_combine_state default_combine_state = {
>  };
>
>
> +/**
> + * Return the the currently active target index of the given texture unit,
> + * fall back to 2D if nothing is current.
> + */
> +gl_texture_index
> +_mesa_get_texture_target_index(struct gl_context *ctx, const unsigned unit)
> +{
> +   struct gl_texture_object *texObj = _mesa_get_tex_unit(ctx, unit)->_Current;
> +
> +   if (texObj) {
> +      return _mesa_tex_target_to_index(ctx, texObj->Target);
> +   } else {
> +      /* fallback for missing texture */
> +      return TEXTURE_2D_INDEX;
> +   }
> +
> +}
> +
>
>  /**
>   * Used by glXCopyContext to copy texture state from one context to another.
> diff --git a/src/mesa/main/texstate.h b/src/mesa/main/texstate.h
> index 52fe602..f7100bf 100644
> --- a/src/mesa/main/texstate.h
> +++ b/src/mesa/main/texstate.h
> @@ -63,6 +63,9 @@ _mesa_max_tex_unit(struct gl_context *ctx)
>                 ctx->Const.MaxTextureCoordUnits);
>  }
>
> +gl_texture_index
> +_mesa_get_texture_target_index(struct gl_context *ctx, const unsigned unit);
> +
>
>  extern void
>  _mesa_copy_texture_state( const struct gl_context *src, struct gl_context *dst );
> diff --git a/src/mesa/program/program.h b/src/mesa/program/program.h
> index 24e0597..09e6928 100644
> --- a/src/mesa/program/program.h
> +++ b/src/mesa/program/program.h
> @@ -172,6 +172,8 @@ _mesa_program_enum_to_shader_stage(GLenum v)
>        return MESA_SHADER_VERTEX;
>     case GL_FRAGMENT_PROGRAM_ARB:
>        return MESA_SHADER_FRAGMENT;
> +   case GL_FRAGMENT_SHADER_ATI:
> +      return MESA_SHADER_FRAGMENT;
>     case GL_GEOMETRY_PROGRAM_NV:
>        return MESA_SHADER_GEOMETRY;
>     case GL_TESS_CONTROL_PROGRAM_NV:
> diff --git a/src/mesa/state_tracker/st_atifs_to_tgsi.c b/src/mesa/state_tracker/st_atifs_to_tgsi.c
> new file mode 100644
> index 0000000..6c2d2f2
> --- /dev/null
> +++ b/src/mesa/state_tracker/st_atifs_to_tgsi.c
> @@ -0,0 +1,726 @@
> +/*
> + * Copyright (C) 2016 Miklós Máté
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +#include "main/mtypes.h"
> +#include "main/atifragshader.h"
> +#include "main/texstate.h"
> +#include "main/errors.h"
> +#include "program/prog_parameter.h"
> +
> +#include "tgsi/tgsi_transform.h"
> +#include "tgsi/tgsi_ureg.h"
> +#include "util/u_math.h"
> +
> +#include "st_program.h"
> +#include "st_atifs_to_tgsi.h"
> +
> +/**
> + * Intermediate state used during shader translation.
> + */
> +struct st_translate {
> +   struct ureg_program *ureg;
> +   struct gl_context *ctx;
> +   struct ati_fragment_shader *atifs;
> +
> +   struct ureg_dst temps[MAX_PROGRAM_TEMPS];
> +   struct ureg_src *constants;
> +   struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
> +   struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
> +   struct ureg_src samplers[PIPE_MAX_SAMPLERS];
> +
> +   const GLuint *inputMapping;
> +   const GLuint *outputMapping;
> +
> +   unsigned current_pass;
> +
> +   bool regs_written[MAX_NUM_PASSES_ATI][MAX_NUM_FRAGMENT_REGISTERS_ATI];
> +
> +   boolean error;
> +};
> +
> +struct instruction_desc {
> +   unsigned TGSI_opcode;
> +   const char *name;
> +   unsigned char arg_count;
> +};
> +
> +static const struct instruction_desc inst_desc[] = {
> +   {TGSI_OPCODE_MOV, "MOV", 1},
> +   {TGSI_OPCODE_NOP, "UND", 0}, /* unused */
> +   {TGSI_OPCODE_ADD, "ADD", 2},
> +   {TGSI_OPCODE_MUL, "MUL", 2},
> +   {TGSI_OPCODE_SUB, "SUB", 2},
> +   {TGSI_OPCODE_DP3, "DOT3", 2},
> +   {TGSI_OPCODE_DP4, "DOT4", 2},
> +   {TGSI_OPCODE_MAD, "MAD", 3},
> +   {TGSI_OPCODE_LRP, "LERP", 3},
> +   {TGSI_OPCODE_NOP, "CND", 3},
> +   {TGSI_OPCODE_NOP, "CND0", 3},
> +   {TGSI_OPCODE_NOP, "DOT2_ADD", 3}
> +};
> +
> +static struct ureg_dst
> +get_temp(struct st_translate *t, unsigned index)
> +{
> +   if (ureg_dst_is_undef(t->temps[index]))
> +      t->temps[index] = ureg_DECL_temporary(t->ureg);
> +   return t->temps[index];
> +}
> +
> +static struct ureg_src
> +apply_swizzle(struct st_translate *t,
> +              struct ureg_src src, GLuint swizzle)
> +{
> +   if (swizzle == GL_SWIZZLE_STR_ATI) {
> +      return src;
> +   } else if (swizzle == GL_SWIZZLE_STQ_ATI) {
> +      return ureg_swizzle(src,
> +                          TGSI_SWIZZLE_X,
> +                          TGSI_SWIZZLE_Y,
> +                          TGSI_SWIZZLE_W,
> +                          TGSI_SWIZZLE_Z);
> +   } else {
> +      struct ureg_dst tmp[2];
> +      struct ureg_src imm[3];
> +
> +      tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
> +      tmp[1] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI+1);
> +      imm[0] = src;
> +      imm[1] = ureg_imm4f(t->ureg, 1.0, 1.0, 0.0, 0.0);
> +      imm[2] = ureg_imm4f(t->ureg, 0.0, 0.0, 1.0, 1.0);
> +      ureg_insn(t->ureg, TGSI_OPCODE_MAD, &tmp[0], 1, imm, 3);
> +
> +      if (swizzle == GL_SWIZZLE_STR_DR_ATI) {
> +         imm[0] = ureg_scalar(src, TGSI_SWIZZLE_Z);
> +      } else {
> +         imm[0] = ureg_scalar(src, TGSI_SWIZZLE_W);
> +      }
> +      ureg_insn(t->ureg, TGSI_OPCODE_RCP, &tmp[1], 1, &imm[0], 1);
> +
> +      imm[0] = ureg_src(tmp[0]);
> +      imm[1] = ureg_src(tmp[1]);
> +      ureg_insn(t->ureg, TGSI_OPCODE_MUL, &tmp[0], 1, imm, 2);
> +
> +      return ureg_src(tmp[0]);
> +   }
> +}
> +
> +/**
> + * Map mesa texture target to TGSI texture target.
> + * Copied from st_mesa_to_tgsi.c
> + */
> +static unsigned
> +translate_texture_target(GLuint textarget)
> +{
> +   /* the shadow part is omitted */
> +   switch( textarget ) {
> +   case TEXTURE_2D_MULTISAMPLE_INDEX: return TGSI_TEXTURE_2D_MSAA;
> +   case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX: return TGSI_TEXTURE_2D_ARRAY_MSAA;
> +   case TEXTURE_BUFFER_INDEX: return TGSI_TEXTURE_BUFFER;
> +   case TEXTURE_1D_INDEX:   return TGSI_TEXTURE_1D;
> +   case TEXTURE_2D_INDEX:   return TGSI_TEXTURE_2D;
> +   case TEXTURE_3D_INDEX:   return TGSI_TEXTURE_3D;
> +   case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE;
> +   case TEXTURE_CUBE_ARRAY_INDEX: return TGSI_TEXTURE_CUBE_ARRAY;
> +   case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT;
> +   case TEXTURE_1D_ARRAY_INDEX:   return TGSI_TEXTURE_1D_ARRAY;
> +   case TEXTURE_2D_ARRAY_INDEX:   return TGSI_TEXTURE_2D_ARRAY;
> +   case TEXTURE_EXTERNAL_INDEX:   return TGSI_TEXTURE_2D;
> +   default:
> +      debug_assert( 0 );
> +      return TGSI_TEXTURE_1D;
> +   }
> +}
> +
> +static struct ureg_src
> +get_source(struct st_translate *t, GLuint src_type)
> +{
> +   if (src_type >= GL_REG_0_ATI && src_type <= GL_REG_5_ATI) {
> +      if (t->regs_written[t->current_pass][src_type - GL_REG_0_ATI]) {
> +         return ureg_src(get_temp(t, src_type - GL_REG_0_ATI));
> +      } else {
> +         return ureg_imm1f(t->ureg, 0.0);
> +      }
> +   } else if (src_type >= GL_CON_0_ATI && src_type <= GL_CON_7_ATI) {
> +      return t->constants[src_type - GL_CON_0_ATI];
> +   } else if (src_type == GL_ZERO) {
> +      return ureg_imm1f(t->ureg, 0.0);
> +   } else if (src_type == GL_ONE) {
> +      return ureg_imm1f(t->ureg, 1.0);
> +   } else if (src_type == GL_PRIMARY_COLOR_ARB) {
> +      return t->inputs[t->inputMapping[VARYING_SLOT_COL0]];
> +   } else if (src_type == GL_SECONDARY_INTERPOLATOR_ATI) {
> +      return t->inputs[t->inputMapping[VARYING_SLOT_COL1]];
> +   } else {
> +      /* frontend prevents this */
> +      unreachable("unknown source");
> +   }
> +}
> +
> +static struct ureg_src
> +prepare_argument(struct st_translate *t, const unsigned argId,
> +                 const struct atifragshader_src_register *srcReg)
> +{
> +   struct ureg_src src = get_source(t, srcReg->Index);
> +   struct ureg_dst arg = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + argId);
> +
> +   switch (srcReg->argRep) {
> +   case GL_NONE:
> +      break;
> +   case GL_RED:
> +      src = ureg_scalar(src, TGSI_SWIZZLE_X);
> +      break;
> +   case GL_GREEN:
> +      src = ureg_scalar(src, TGSI_SWIZZLE_Y);
> +      break;
> +   case GL_BLUE:
> +      src = ureg_scalar(src, TGSI_SWIZZLE_Z);
> +      break;
> +   case GL_ALPHA:
> +      src = ureg_scalar(src, TGSI_SWIZZLE_W);
> +      break;
> +   }
> +   ureg_insn(t->ureg, TGSI_OPCODE_MOV, &arg, 1, &src, 1);
> +
> +   if (srcReg->argMod & GL_COMP_BIT_ATI) {
> +      struct ureg_src modsrc[2];
> +      modsrc[0] = ureg_imm1f(t->ureg, 1.0);
> +      modsrc[1] = ureg_src(arg);
> +
> +      ureg_insn(t->ureg, TGSI_OPCODE_SUB, &arg, 1, modsrc, 2);
> +   }
> +   if (srcReg->argMod & GL_BIAS_BIT_ATI) {
> +      struct ureg_src modsrc[2];
> +      modsrc[0] = ureg_src(arg);
> +      modsrc[1] = ureg_imm1f(t->ureg, 0.5);
> +
> +      ureg_insn(t->ureg, TGSI_OPCODE_SUB, &arg, 1, modsrc, 2);
> +   }
> +   if (srcReg->argMod & GL_2X_BIT_ATI) {
> +      struct ureg_src modsrc[2];
> +      modsrc[0] = ureg_src(arg);
> +      modsrc[1] = ureg_src(arg);
> +
> +      ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2);
> +   }
> +   if (srcReg->argMod & GL_NEGATE_BIT_ATI) {
> +      struct ureg_src modsrc[2];
> +      modsrc[0] = ureg_src(arg);
> +      modsrc[1] = ureg_imm1f(t->ureg, -1.0);
> +
> +      ureg_insn(t->ureg, TGSI_OPCODE_MUL, &arg, 1, modsrc, 2);
> +   }
> +   return  ureg_src(arg);
> +}
> +
> +/* These instructions need special treatment */
> +static void
> +emit_special_inst(struct st_translate *t, const struct instruction_desc *desc,
> +                  struct ureg_dst *dst, struct ureg_src *args, unsigned argcount)
> +{
> +   struct ureg_dst tmp[1];
> +   struct ureg_src src[3];
> +
> +   if (!strcmp(desc->name, "CND")) {
> +      tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + 2); /* re-purpose a3 */
> +      src[0] = ureg_imm1f(t->ureg, 0.5f);
> +      src[1] = args[2];
> +      ureg_insn(t->ureg, TGSI_OPCODE_SUB, tmp, 1, src, 2);
> +      src[0] = ureg_src(tmp[0]);
> +      src[1] = args[0];
> +      src[2] = args[1];
> +      ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3);
> +   } else if (!strcmp(desc->name, "CND0")) {
> +      src[0] = args[2];
> +      src[1] = args[1];
> +      src[2] = args[0];
> +      ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3);
> +   } else if (!strcmp(desc->name, "DOT2_ADD")) {
> +      /* note: DP2A is not implemented in most pipe drivers */
> +      tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); /* re-purpose a1 */
> +      src[0] = args[0];
> +      src[1] = args[1];
> +      ureg_insn(t->ureg, TGSI_OPCODE_DP2, tmp, 1, src, 2);
> +      src[0] = ureg_src(tmp[0]);
> +      src[1] = ureg_scalar(args[2], TGSI_SWIZZLE_Z);
> +      ureg_insn(t->ureg, TGSI_OPCODE_ADD, dst, 1, src, 2);
> +   }
> +}
> +
> +static void
> +emit_arith_inst(struct st_translate *t,
> +                const struct instruction_desc *desc,
> +                struct ureg_dst *dst, struct ureg_src *args, unsigned argcount)
> +{
> +   if (desc->TGSI_opcode == TGSI_OPCODE_NOP) {
> +      return emit_special_inst(t, desc, dst, args, argcount);
> +   }
> +
> +   ureg_insn(t->ureg, desc->TGSI_opcode, dst, 1, args, argcount);
> +}
> +
> +static void
> +emit_dstmod(struct st_translate *t,
> +            struct ureg_dst dst, GLuint dstMod)
> +{
> +   float imm;
> +   struct ureg_src src[3];
> +   GLuint scale = dstMod & ~GL_SATURATE_BIT_ATI;
> +
> +   if (dstMod == GL_NONE) {
> +      return;
> +   }
> +
> +   switch (scale) {
> +   case GL_2X_BIT_ATI:
> +      imm = 2.0f;
> +      break;
> +   case GL_4X_BIT_ATI:
> +      imm = 4.0f;
> +      break;
> +   case GL_8X_BIT_ATI:
> +      imm = 8.0f;
> +      break;
> +   case GL_HALF_BIT_ATI:
> +      imm = 0.5f;
> +      break;
> +   case GL_QUARTER_BIT_ATI:
> +      imm = 0.25f;
> +      break;
> +   case GL_EIGHTH_BIT_ATI:
> +      imm = 0.125f;
> +      break;
> +   default:
> +      imm = 1.0f;
> +   }
> +
> +   src[0] = ureg_src(dst);
> +   src[1] = ureg_imm1f(t->ureg, imm);
> +   if (dstMod & GL_SATURATE_BIT_ATI) {
> +      dst = ureg_saturate(dst);
> +   }
> +   ureg_insn(t->ureg, TGSI_OPCODE_MUL, &dst, 1, src, 2);
> +}
> +
> +/**
> + * Compile one setup instruction to TGSI instructions.
> + */
> +static void
> +compile_setupinst(struct st_translate *t,
> +                  const unsigned r,
> +                  const struct atifs_setupinst *texinst)
> +{
> +   struct ureg_dst dst[1];
> +   struct ureg_src src[2];
> +
> +   if (!texinst->Opcode)
> +      return;
> +
> +   dst[0] = get_temp(t, r);
> +
> +   GLuint pass_tex = texinst->src;
> +
> +   if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
> +      unsigned attr = pass_tex - GL_TEXTURE0_ARB + VARYING_SLOT_TEX0;
> +
> +      src[0] = t->inputs[t->inputMapping[attr]];
> +   } else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
> +      unsigned reg = pass_tex - GL_REG_0_ATI;
> +
> +      /* the frontend already validated that REG is only allowed in second pass */
> +      if (t->regs_written[0][reg]) {
> +         src[0] = ureg_src(t->temps[reg]);
> +      } else {
> +         src[0] = ureg_imm1f(t->ureg, 0.0);
> +      }
> +   }
> +   src[0] = apply_swizzle(t, src[0], texinst->swizzle);
> +
> +   if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) {
> +      /* use the current texture target for the sample operation
> +       * note: this implementation doesn't support re-using an ATI_fs
> +       *    with different texture targets
> +       */
> +      gl_texture_index index = _mesa_get_texture_target_index(t->ctx, r);

Please use value from the shader key here, not the context function.

> +      unsigned target = translate_texture_target(index);
> +
> +      /* by default texture and sampler indexes are the same */
> +      src[1] = t->samplers[r];
> +      ureg_tex_insn(t->ureg, TGSI_OPCODE_TEX, dst, 1, target,
> +            NULL, 0, src, 2);
> +   } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
> +      ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1);
> +   }
> +
> +   t->regs_written[t->current_pass][r] = true;
> +}
> +
> +/**
> + * Compile one arithmetic operation COLOR&ALPHA pair into TGSI instructions.
> + */
> +static void
> +compile_instruction(struct st_translate *t,
> +                    const struct atifs_instruction *inst)
> +{
> +   unsigned optype;
> +
> +   for (optype = 0; optype < 2; optype++) { /* color, alpha */
> +      const struct instruction_desc *desc;
> +      struct ureg_dst dst[1];
> +      struct ureg_src args[3]; /* arguments for the main operation */
> +      unsigned arg;
> +      unsigned dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI;
> +
> +      if (!inst->Opcode[optype])
> +         continue;
> +
> +      desc = &inst_desc[inst->Opcode[optype] - GL_MOV_ATI];
> +
> +      /* prepare the arguments */
> +      for (arg = 0; arg < desc->arg_count; arg++) {
> +         if (arg >= inst->ArgCount[optype]) {
> +            _mesa_warning(t->ctx, "Using 0 for missing argument %d of %s\n",
> +                  arg, desc->name);
> +            args[arg] = ureg_imm1f(t->ureg, 0.0);
> +         } else {
> +            args[arg] = prepare_argument(t, arg,
> +                  &inst->SrcReg[optype][arg]);
> +         }
> +      }
> +
> +      /* prepare dst */
> +      dst[0] = get_temp(t, dstreg);
> +
> +      if (optype) {
> +         dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_W);
> +      } else {
> +         GLuint dstMask = inst->DstReg[optype].dstMask;
> +         if (dstMask == GL_NONE) {
> +            dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ);
> +         } else {
> +            dst[0] = ureg_writemask(dst[0], dstMask); /* the enum values match */
> +         }
> +      }
> +
> +      /* emit the main instruction */
> +      emit_arith_inst(t, desc, dst, args, arg);
> +
> +      emit_dstmod(t, *dst, inst->DstReg[optype].dstMod);
> +
> +      t->regs_written[t->current_pass][dstreg] = true;
> +   }
> +}
> +
> +static void
> +apply_fog(struct st_translate *t)
> +{
> +   struct gl_fog_attrib *fog = &t->ctx->Fog;
> +   struct ureg_src oparams = t->constants[8];
> +   struct ureg_src fogcolor = t->constants[9];
> +   /* this is a single float in the X coordinate */
> +   struct ureg_src fogcoord = t->inputs[t->inputMapping[VARYING_SLOT_FOGC]];
> +
> +   struct ureg_dst dst[1];
> +   struct ureg_src src[3];
> +
> +   /* compute the 1 component fog factor f */
> +   if (fog->Mode == GL_LINEAR) {
> +      /* formula: f = (end - z) / (end - start)
> +       * with optimized parameters: f = MAD(fogcoord, oparams.x, oparams.y)
> +       */
> +      dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
> +      src[0] = fogcoord;
> +      src[1] = ureg_scalar(oparams, TGSI_SWIZZLE_X);
> +      src[2] = ureg_scalar(oparams, TGSI_SWIZZLE_Y);
> +
> +      ureg_insn(t->ureg, TGSI_OPCODE_MAD, dst, 1, src, 3);
> +   } else if (fog->Mode == GL_EXP) {
> +      /* formula: f = exp(-dens * z)
> +       * with optimized parameters:
> +       *    f = MUL(fogcoord, oparams.z); f= EX2(-f)
> +       */
> +      dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
> +      src[0] = fogcoord;
> +      src[1] = ureg_scalar(oparams, TGSI_SWIZZLE_Z);
> +      ureg_insn(t->ureg, TGSI_OPCODE_MUL, dst, 1, src, 2);
> +      dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
> +      src[0] = ureg_negate(ureg_src(dst[0]));
> +      ureg_insn(t->ureg, TGSI_OPCODE_EX2, dst, 1, src, 1);
> +   } else if (fog->Mode == GL_EXP2) {
> +      /* formula: f = exp(-(dens * z)^2)
> +       * with optimized parameters:
> +       *    f = MUL(fogcoord, oparams.w); f=MUL(f, f); f= EX2(-f)
> +       */
> +      dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
> +      src[0] = fogcoord;
> +      src[1] = ureg_scalar(oparams, TGSI_SWIZZLE_W);
> +      ureg_insn(t->ureg, TGSI_OPCODE_MUL, dst, 1, src, 2);
> +      dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
> +      src[0] = ureg_src(dst[0]);
> +      ureg_insn(t->ureg, TGSI_OPCODE_MUL, dst, 1, src, 2);
> +      dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
> +      src[0] = ureg_negate(ureg_src(dst[0]));
> +      ureg_insn(t->ureg, TGSI_OPCODE_EX2, dst, 1, src, 1);
> +   }
> +
> +   dst[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
> +   src[0] = ureg_src(dst[0]);
> +   src[1] = ureg_imm1f(t->ureg, 0.0f);
> +   src[2] = ureg_imm1f(t->ureg, 1.0f);
> +   ureg_insn(t->ureg, TGSI_OPCODE_CLAMP, dst, 1, src, 3);
> +
> +   dst[0] = t->temps[0];
> +   src[0] = ureg_swizzle(ureg_src(get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI)),
> +                         TGSI_SWIZZLE_X,
> +                         TGSI_SWIZZLE_X,
> +                         TGSI_SWIZZLE_X,
> +                         TGSI_SWIZZLE_Y);
> +   src[1] = ureg_src(t->temps[0]);
> +   src[2] = fogcolor;
> +   ureg_insn(t->ureg, TGSI_OPCODE_LRP, dst, 1, src, 3);
> +}
> +
> +static void
> +finalize_shader(struct st_translate *t, unsigned numPasses)
> +{
> +   struct ureg_dst dst[1] = { { 0 } };
> +   struct ureg_src src[1] = { { 0 } };
> +
> +   if (t->regs_written[numPasses-1][0]) {
> +      if (t->ctx->Fog.Enabled) {
> +         apply_fog(t);
> +      }
> +
> +      /* copy the result into the OUT slot */
> +      dst[0] = t->outputs[t->outputMapping[FRAG_RESULT_COLOR]];
> +      src[0] = ureg_src(t->temps[0]);
> +      ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1);
> +   }
> +
> +   /* signal the end of the program */
> +   ureg_insn(t->ureg, TGSI_OPCODE_END, dst, 0, src, 0);
> +}
> +
> +/**
> + * Called when a new variant is needed, we need to translate
> + * the ATI fragment shader to TGSI
> + */
> +enum pipe_error
> +st_translate_atifs_program(
> +   struct gl_context *ctx,
> +   struct ureg_program *ureg,
> +   struct ati_fragment_shader *atifs,
> +   struct gl_program *program,
> +   GLuint numInputs,
> +   const GLuint inputMapping[],
> +   const ubyte inputSemanticName[],
> +   const ubyte inputSemanticIndex[],
> +   const GLuint interpMode[],
> +   GLuint numOutputs,
> +   const GLuint outputMapping[],
> +   const ubyte outputSemanticName[],
> +   const ubyte outputSemanticIndex[])
> +{
> +   enum pipe_error ret = PIPE_OK;
> +
> +   unsigned pass, i, r;
> +
> +   struct st_translate translate, *t;
> +   t = &translate;
> +   memset(t, 0, sizeof *t);
> +
> +   t->inputMapping = inputMapping;
> +   t->outputMapping = outputMapping;
> +   t->ureg = ureg;
> +   t->ctx = ctx;
> +   t->atifs = atifs;
> +
> +   /*
> +    * Declare input attributes.
> +    */
> +   for (i = 0; i < numInputs; i++) {
> +      t->inputs[i] = ureg_DECL_fs_input(ureg,
> +            inputSemanticName[i],
> +            inputSemanticIndex[i],
> +            interpMode[i]);
> +   }
> +
> +   /*
> +    * Declare output attributes:
> +    *  we always have numOutputs=1 and it's FRAG_RESULT_COLOR
> +    */
> +   t->outputs[0] = ureg_DECL_output( ureg,
> +         TGSI_SEMANTIC_COLOR,
> +         outputSemanticIndex[0] );
> +
> +   /* Emit constants and immediates.  Mesa uses a single index space
> +    * for these, so we put all the translated regs in t->constants.
> +    */
> +   if (program->Parameters) {
> +      t->constants = calloc( program->Parameters->NumParameters,
> +            sizeof t->constants[0] );
> +      if (t->constants == NULL) {
> +         ret = PIPE_ERROR_OUT_OF_MEMORY;
> +         goto out;
> +      }
> +
> +      for (i = 0; i < program->Parameters->NumParameters; i++) {
> +         switch (program->Parameters->Parameters[i].Type) {
> +         case PROGRAM_STATE_VAR:
> +         case PROGRAM_UNIFORM:
> +            t->constants[i] = ureg_DECL_constant( ureg, i );
> +            break;
> +
> +         case PROGRAM_CONSTANT:
> +            t->constants[i] =
> +               ureg_DECL_immediate( ureg,
> +                                    (const float*)program->Parameters->ParameterValues[i],
> +                                    4 );
> +            break;
> +         default:
> +            break;
> +         }
> +      }
> +   }
> +
> +   /* texture samplers */
> +   for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) {
> +      if (program->SamplersUsed & (1 << i)) {
> +         t->samplers[i] = ureg_DECL_sampler( ureg, i );
> +
> +         /* fix texture targets that are not 2D * /
> +         / * note: this implementation doesn't support re-using an ATI_fs
> +          *    with different texture targets
> +          */
> +         gl_texture_index index = _mesa_get_texture_target_index(ctx, i);

Same here - please use the shader key. Also, the comment above that
looks obsolete now.

Marek


More information about the mesa-dev mailing list