[Mesa-dev] [PATCH 2/5] st/nine: Remove all usage of ureg_SUB in nine_ff

Jose Fonseca jfonseca at vmware.com
Fri Jan 6 11:43:10 UTC 2017


I think this is a good idea.

We still use them but I'm happy to see them go

It would be much easier for you and for us if you just implemented a 
ureg_ABS() / ureg_SUB inline helper that would call ureg_MOV/ureg_ADD 
internally:  fewer chances of a typo somewhere, and less work necessary 
all around.

Jose


On 01/01/17 00:04, Marek Olšák wrote:
> From: Axel Davy <axel.davy at ens.fr>
>
> This is required to remove gallium SUB.
>
> Signed-off-by: Axel Davy <axel.davy at ens.fr>
> ---
>  src/gallium/state_trackers/nine/nine_ff.c | 40 +++++++++++++++----------------
>  1 file changed, 20 insertions(+), 20 deletions(-)
>
> diff --git a/src/gallium/state_trackers/nine/nine_ff.c b/src/gallium/state_trackers/nine/nine_ff.c
> index a0a33cd..7cbe3f7 100644
> --- a/src/gallium/state_trackers/nine/nine_ff.c
> +++ b/src/gallium/state_trackers/nine/nine_ff.c
> @@ -442,23 +442,23 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
>              ureg_MOV(ureg, oPos, vs->aVtx);
>          } else {
>              struct ureg_dst tmp = ureg_DECL_temporary(ureg);
>              /* vs->aVtx contains the coordinates buffer wise.
>              * later in the pipeline, clipping, viewport and division
>              * by w (rhw = 1/w) are going to be applied, so do the reverse
>              * of these transformations (except clipping) to have the good
>              * position at the end.*/
>              ureg_MOV(ureg, tmp, vs->aVtx);
>              /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */
> -            ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(101));
> +            ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_negate(_CONST(101)));
>              ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100));
> -            ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 1.0f));
> +            ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
>              /* Y needs to be reversed */
>              ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp)));
>              /* inverse rhw */
>              ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp));
>              /* multiply X, Y, Z by w */
>              ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp));
>              ureg_MOV(ureg, oPos, ureg_src(tmp));
>              ureg_release_temporary(ureg, tmp);
>          }
>      } else if (key->vertexblend) {
> @@ -504,21 +504,21 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
>                  ureg_MAD(ureg, tmp2, _YYYY(vs->aNrm), cWM[1], ureg_src(tmp2));
>                  ureg_MAD(ureg, tmp2, _ZZZZ(vs->aNrm), cWM[2], ureg_src(tmp2));
>              }
>
>              if (i < (key->vertexblend - 1)) {
>                  /* accumulate weighted position value */
>                  ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(aVtx_dst));
>                  if (has_aNrm)
>                      ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(vs->aWgt, i), ureg_src(aNrm_dst));
>                  /* subtract weighted position value for last value */
> -                ureg_SUB(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_scalar(vs->aWgt, i));
> +                ureg_ADD(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_negate(ureg_scalar(vs->aWgt, i)));
>              }
>          }
>
>          /* the last weighted position is always 1 - sum_of_previous_weights */
>          ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aVtx_dst));
>          if (has_aNrm)
>              ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aNrm_dst));
>
>          /* multiply by VIEW_PROJ */
>          ureg_MUL(ureg, tmp, _X(aVtx_dst), _CONST(8));
> @@ -654,36 +654,36 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
>              ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
>              dim_input = 4;
>              break;
>          case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
>              tmp.WriteMask = TGSI_WRITEMASK_XYZ;
>              aVtx_normed = ureg_DECL_temporary(ureg);
>              ureg_normalize3(ureg, aVtx_normed, vs->aVtx);
>              ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
>              ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
>              ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
> -            ureg_SUB(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_src(tmp));
> +            ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));
>              ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
>              ureg_release_temporary(ureg, aVtx_normed);
>              dim_input = 4;
>              tmp.WriteMask = TGSI_WRITEMASK_XYZW;
>              break;
>          case NINED3DTSS_TCI_SPHEREMAP:
>              /* Implement the formula of GL_SPHERE_MAP */
>              tmp.WriteMask = TGSI_WRITEMASK_XYZ;
>              aVtx_normed = ureg_DECL_temporary(ureg);
>              tmp2 = ureg_DECL_temporary(ureg);
>              ureg_normalize3(ureg, aVtx_normed, vs->aVtx);
>              ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
>              ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
>              ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
> -            ureg_SUB(ureg, tmp, ureg_src(aVtx_normed), ureg_src(tmp));
> +            ureg_ADD(ureg, tmp, ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));
>              /* now tmp = normed(Vtx) - 2 dot3(normed(Vtx), Nrm) Nrm */
>              ureg_MOV(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_XYZ), ureg_src(tmp));
>              ureg_MUL(ureg, tmp2, ureg_src(tmp2), ureg_src(tmp2));
>              ureg_DP3(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_src(tmp2));
>              ureg_RSQ(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2));
>              ureg_MUL(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_imm1f(ureg, 0.5f));
>              /* tmp2 = 0.5 / sqrt(tmp.x^2 + tmp.y^2 + (tmp.z+1)^2)
>               * TODO: z coordinates are a bit different gl vs d3d, should the formula be adapted ? */
>              ureg_MUL(ureg, tmp, ureg_src(tmp), _X(tmp2));
>              ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
> @@ -822,21 +822,21 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
>
>          /* if (not DIRECTIONAL light): */
>          ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL));
>          ureg_MOV(ureg, rHit, ureg_negate(cLDir));
>          ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f));
>          ureg_IF(ureg, _X(tmp), &label[l++]);
>          {
>              /* hitDir = light.position - eyeVtx
>               * d = length(hitDir)
>               */
> -            ureg_SUB(ureg, rHit, cLPos, vs->aVtx);
> +            ureg_ADD(ureg, rHit, cLPos, ureg_negate(vs->aVtx));
>              ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));
>              ureg_RSQ(ureg, tmp_y, _X(tmp));
>              ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */
>
>              /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
>              ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1);
>              ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0);
>              ureg_RCP(ureg, rAtt, _W(rAtt));
>              /* cut-off if distance exceeds Light.Range */
>              ureg_SLT(ureg, tmp_x, _X(tmp), cLRng);
> @@ -856,39 +856,39 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
>               *
>               * if (rho  > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi
>               *     spotAtt = 1
>               * else
>               * if (rho <= light.cphi2)
>               *     spotAtt = 0
>               * else
>               *     spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff
>               */
>              ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */
> -            ureg_SUB(ureg, tmp_x, _Y(tmp), cLPhi);
> +            ureg_ADD(ureg, tmp_x, _Y(tmp), ureg_negate(cLPhi));
>              ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv);
>              ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */
>              ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */
>              ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */
>              ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp));
>              ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
>          }
>          ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
>          ureg_ENDIF(ureg);
>
>          /* directional factors, let's not use LIT because of clarity */
>
>          if (has_aNrm) {
>              if (key->localviewer) {
>                  ureg_normalize3(ureg, rMid, vs->aVtx);
> -                ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_src(rMid));
> +                ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid)));
>              } else {
> -                ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f));
> +                ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, -1.0f));
>              }
>              ureg_normalize3(ureg, rMid, ureg_src(rMid));
>              ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit));
>              ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid));
>              ureg_MUL(ureg, tmp_z, _X(tmp), _Y(tmp));
>              /* Tests show that specular is computed only if (dp3(normal,hitDir) > 0).
>               * For front facing, it is more restrictive than test (dp3(normal,mid) > 0).
>               * No tests were made for backfacing, so add the two conditions */
>              ureg_IF(ureg, _Z(tmp), &label[l++]);
>              {
> @@ -977,21 +977,21 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
>              ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
>              ureg_EX2(ureg, tmp_x, _X(tmp));
>          } else
>          if (key->fog_mode == D3DFOG_EXP2) {
>              ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
>              ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp));
>              ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
>              ureg_EX2(ureg, tmp_x, _X(tmp));
>          } else
>          if (key->fog_mode == D3DFOG_LINEAR) {
> -            ureg_SUB(ureg, tmp_x, _XXXX(_CONST(28)), _Z(tmp));
> +            ureg_ADD(ureg, tmp_x, _XXXX(_CONST(28)), ureg_negate(_Z(tmp)));
>              ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28)));
>          }
>          ureg_MOV(ureg, oFog, _X(tmp));
>          ureg_release_temporary(ureg, tmp);
>      } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) {
>          ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
>      }
>
>      if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) {
>          struct ureg_src input;
> @@ -1125,21 +1125,21 @@ ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta)
>      case D3DTA_TFACTOR:
>          reg = ureg_DECL_constant(ps->ureg, 20);
>          break;
>      default:
>          assert(0);
>          reg = ureg_src_undef();
>          break;
>      }
>      if (ta & D3DTA_COMPLEMENT) {
>          struct ureg_dst dst = ureg_DECL_temporary(ps->ureg);
> -        ureg_SUB(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), reg);
> +        ureg_ADD(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), ureg_negate(reg));
>          reg = ureg_src(dst);
>      }
>      if (ta & D3DTA_ALPHAREPLICATE)
>          reg = _WWWW(reg);
>      return reg;
>  }
>
>  static struct ureg_dst
>  ps_get_ts_dst(struct ps_build_ctx *ps, unsigned ta)
>  {
> @@ -1228,75 +1228,75 @@ ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct u
>          break;
>      case D3DTOP_MODULATE4X:
>          ureg_MUL(ureg, tmp, arg[1], arg[2]);
>          ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f));
>          break;
>      case D3DTOP_ADD:
>          ureg_ADD(ureg, dst, arg[1], arg[2]);
>          break;
>      case D3DTOP_ADDSIGNED:
>          ureg_ADD(ureg, tmp, arg[1], arg[2]);
> -        ureg_SUB(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
> +        ureg_ADD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, -0.5f));
>          break;
>      case D3DTOP_ADDSIGNED2X:
>          ureg_ADD(ureg, tmp, arg[1], arg[2]);
>          ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
>          break;
>      case D3DTOP_SUBTRACT:
> -        ureg_SUB(ureg, dst, arg[1], arg[2]);
> +        ureg_ADD(ureg, dst, arg[1], ureg_negate(arg[2]));
>          break;
>      case D3DTOP_ADDSMOOTH:
> -        ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
> +        ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));
>          ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]);
>          break;
>      case D3DTOP_BLENDDIFFUSEALPHA:
>          ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]);
>          break;
>      case D3DTOP_BLENDTEXTUREALPHA:
>          /* XXX: alpha taken from previous stage, texture or result ? */
>          ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]);
>          break;
>      case D3DTOP_BLENDFACTORALPHA:
>          ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]);
>          break;
>      case D3DTOP_BLENDTEXTUREALPHAPM:
> -        ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _W(ps->rTex));
> +        ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_W(ps->rTex)));
>          ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]);
>          break;
>      case D3DTOP_BLENDCURRENTALPHA:
>          ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]);
>          break;
>      case D3DTOP_PREMODULATE:
>          ureg_MOV(ureg, dst, arg[1]);
>          ps->stage.index_pre_mod = ps->stage.index + 1;
>          break;
>      case D3DTOP_MODULATEALPHA_ADDCOLOR:
>          ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]);
>          break;
>      case D3DTOP_MODULATECOLOR_ADDALPHA:
>          ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1]));
>          break;
>      case D3DTOP_MODULATEINVALPHA_ADDCOLOR:
> -        ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _WWWW(arg[1]));
> +        ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_WWWW(arg[1])));
>          ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]);
>          break;
>      case D3DTOP_MODULATEINVCOLOR_ADDALPHA:
> -        ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
> +        ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));
>          ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1]));
>          break;
>      case D3DTOP_BUMPENVMAP:
>          break;
>      case D3DTOP_BUMPENVMAPLUMINANCE:
>          break;
>      case D3DTOP_DOTPRODUCT3:
> -        ureg_SUB(ureg, tmp, arg[1], ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
> -        ureg_SUB(ureg, tmp2, arg[2] , ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
> +        ureg_ADD(ureg, tmp, arg[1], ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));
> +        ureg_ADD(ureg, tmp2, arg[2] , ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));
>          ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2));
>          ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0));
>          break;
>      case D3DTOP_MULTIPLYADD:
>          ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]);
>          break;
>      case D3DTOP_LERP:
>          ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]);
>          break;
>      case D3DTOP_DISABLE:
> @@ -1529,21 +1529,21 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
>              ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
>              ureg_EX2(ureg, rFog, _X(rFog));
>          } else
>          if (key->fog_mode == D3DFOG_EXP2) {
>              ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22)));
>              ureg_MUL(ureg, rFog, _X(rFog), _X(rFog));
>              ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
>              ureg_EX2(ureg, rFog, _X(rFog));
>          } else
>          if (key->fog_mode == D3DFOG_LINEAR) {
> -            ureg_SUB(ureg, rFog, _XXXX(_CONST(22)), _X(rFog));
> +            ureg_ADD(ureg, rFog, _XXXX(_CONST(22)), ureg_negate(_X(rFog)));
>              ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22)));
>          }
>          ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21));
>          ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
>      } else
>      if (key->fog) {
>          struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0, TGSI_INTERPOLATE_PERSPECTIVE);
>          ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21));
>          ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
>      } else {
>



More information about the mesa-dev mailing list