[Mesa-dev] [PATCH 2/5] st/nine: Remove all usage of ureg_SUB in nine_ff
Marek Olšák
maraeo at gmail.com
Sun Jan 1 00:04:30 UTC 2017
From: Axel Davy <axel.davy at ens.fr>
This is required to remove gallium SUB.
Signed-off-by: Axel Davy <axel.davy at ens.fr>
---
src/gallium/state_trackers/nine/nine_ff.c | 40 +++++++++++++++----------------
1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/src/gallium/state_trackers/nine/nine_ff.c b/src/gallium/state_trackers/nine/nine_ff.c
index a0a33cd..7cbe3f7 100644
--- a/src/gallium/state_trackers/nine/nine_ff.c
+++ b/src/gallium/state_trackers/nine/nine_ff.c
@@ -442,23 +442,23 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
ureg_MOV(ureg, oPos, vs->aVtx);
} else {
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
/* vs->aVtx contains the coordinates buffer wise.
* later in the pipeline, clipping, viewport and division
* by w (rhw = 1/w) are going to be applied, so do the reverse
* of these transformations (except clipping) to have the good
* position at the end.*/
ureg_MOV(ureg, tmp, vs->aVtx);
/* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */
- ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(101));
+ ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_negate(_CONST(101)));
ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100));
- ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 1.0f));
+ ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
/* Y needs to be reversed */
ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp)));
/* inverse rhw */
ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp));
/* multiply X, Y, Z by w */
ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp));
ureg_MOV(ureg, oPos, ureg_src(tmp));
ureg_release_temporary(ureg, tmp);
}
} else if (key->vertexblend) {
@@ -504,21 +504,21 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
ureg_MAD(ureg, tmp2, _YYYY(vs->aNrm), cWM[1], ureg_src(tmp2));
ureg_MAD(ureg, tmp2, _ZZZZ(vs->aNrm), cWM[2], ureg_src(tmp2));
}
if (i < (key->vertexblend - 1)) {
/* accumulate weighted position value */
ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(aVtx_dst));
if (has_aNrm)
ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(vs->aWgt, i), ureg_src(aNrm_dst));
/* subtract weighted position value for last value */
- ureg_SUB(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_scalar(vs->aWgt, i));
+ ureg_ADD(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_negate(ureg_scalar(vs->aWgt, i)));
}
}
/* the last weighted position is always 1 - sum_of_previous_weights */
ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aVtx_dst));
if (has_aNrm)
ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aNrm_dst));
/* multiply by VIEW_PROJ */
ureg_MUL(ureg, tmp, _X(aVtx_dst), _CONST(8));
@@ -654,36 +654,36 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
dim_input = 4;
break;
case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
tmp.WriteMask = TGSI_WRITEMASK_XYZ;
aVtx_normed = ureg_DECL_temporary(ureg);
ureg_normalize3(ureg, aVtx_normed, vs->aVtx);
ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
- ureg_SUB(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_src(tmp));
+ ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));
ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
ureg_release_temporary(ureg, aVtx_normed);
dim_input = 4;
tmp.WriteMask = TGSI_WRITEMASK_XYZW;
break;
case NINED3DTSS_TCI_SPHEREMAP:
/* Implement the formula of GL_SPHERE_MAP */
tmp.WriteMask = TGSI_WRITEMASK_XYZ;
aVtx_normed = ureg_DECL_temporary(ureg);
tmp2 = ureg_DECL_temporary(ureg);
ureg_normalize3(ureg, aVtx_normed, vs->aVtx);
ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
- ureg_SUB(ureg, tmp, ureg_src(aVtx_normed), ureg_src(tmp));
+ ureg_ADD(ureg, tmp, ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));
/* now tmp = normed(Vtx) - 2 dot3(normed(Vtx), Nrm) Nrm */
ureg_MOV(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_XYZ), ureg_src(tmp));
ureg_MUL(ureg, tmp2, ureg_src(tmp2), ureg_src(tmp2));
ureg_DP3(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_src(tmp2));
ureg_RSQ(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2));
ureg_MUL(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_imm1f(ureg, 0.5f));
/* tmp2 = 0.5 / sqrt(tmp.x^2 + tmp.y^2 + (tmp.z+1)^2)
* TODO: z coordinates are a bit different gl vs d3d, should the formula be adapted ? */
ureg_MUL(ureg, tmp, ureg_src(tmp), _X(tmp2));
ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
@@ -822,21 +822,21 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
/* if (not DIRECTIONAL light): */
ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL));
ureg_MOV(ureg, rHit, ureg_negate(cLDir));
ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f));
ureg_IF(ureg, _X(tmp), &label[l++]);
{
/* hitDir = light.position - eyeVtx
* d = length(hitDir)
*/
- ureg_SUB(ureg, rHit, cLPos, vs->aVtx);
+ ureg_ADD(ureg, rHit, cLPos, ureg_negate(vs->aVtx));
ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));
ureg_RSQ(ureg, tmp_y, _X(tmp));
ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */
/* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1);
ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0);
ureg_RCP(ureg, rAtt, _W(rAtt));
/* cut-off if distance exceeds Light.Range */
ureg_SLT(ureg, tmp_x, _X(tmp), cLRng);
@@ -856,39 +856,39 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
*
* if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi
* spotAtt = 1
* else
* if (rho <= light.cphi2)
* spotAtt = 0
* else
* spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff
*/
ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */
- ureg_SUB(ureg, tmp_x, _Y(tmp), cLPhi);
+ ureg_ADD(ureg, tmp_x, _Y(tmp), ureg_negate(cLPhi));
ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv);
ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */
ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */
ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */
ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp));
ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
}
ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
ureg_ENDIF(ureg);
/* directional factors, let's not use LIT because of clarity */
if (has_aNrm) {
if (key->localviewer) {
ureg_normalize3(ureg, rMid, vs->aVtx);
- ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_src(rMid));
+ ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid)));
} else {
- ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f));
+ ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, -1.0f));
}
ureg_normalize3(ureg, rMid, ureg_src(rMid));
ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit));
ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid));
ureg_MUL(ureg, tmp_z, _X(tmp), _Y(tmp));
/* Tests show that specular is computed only if (dp3(normal,hitDir) > 0).
* For front facing, it is more restrictive than test (dp3(normal,mid) > 0).
* No tests were made for backfacing, so add the two conditions */
ureg_IF(ureg, _Z(tmp), &label[l++]);
{
@@ -977,21 +977,21 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
ureg_EX2(ureg, tmp_x, _X(tmp));
} else
if (key->fog_mode == D3DFOG_EXP2) {
ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp));
ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
ureg_EX2(ureg, tmp_x, _X(tmp));
} else
if (key->fog_mode == D3DFOG_LINEAR) {
- ureg_SUB(ureg, tmp_x, _XXXX(_CONST(28)), _Z(tmp));
+ ureg_ADD(ureg, tmp_x, _XXXX(_CONST(28)), ureg_negate(_Z(tmp)));
ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28)));
}
ureg_MOV(ureg, oFog, _X(tmp));
ureg_release_temporary(ureg, tmp);
} else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) {
ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
}
if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) {
struct ureg_src input;
@@ -1125,21 +1125,21 @@ ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta)
case D3DTA_TFACTOR:
reg = ureg_DECL_constant(ps->ureg, 20);
break;
default:
assert(0);
reg = ureg_src_undef();
break;
}
if (ta & D3DTA_COMPLEMENT) {
struct ureg_dst dst = ureg_DECL_temporary(ps->ureg);
- ureg_SUB(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), reg);
+ ureg_ADD(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), ureg_negate(reg));
reg = ureg_src(dst);
}
if (ta & D3DTA_ALPHAREPLICATE)
reg = _WWWW(reg);
return reg;
}
static struct ureg_dst
ps_get_ts_dst(struct ps_build_ctx *ps, unsigned ta)
{
@@ -1228,75 +1228,75 @@ ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct u
break;
case D3DTOP_MODULATE4X:
ureg_MUL(ureg, tmp, arg[1], arg[2]);
ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f));
break;
case D3DTOP_ADD:
ureg_ADD(ureg, dst, arg[1], arg[2]);
break;
case D3DTOP_ADDSIGNED:
ureg_ADD(ureg, tmp, arg[1], arg[2]);
- ureg_SUB(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
+ ureg_ADD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, -0.5f));
break;
case D3DTOP_ADDSIGNED2X:
ureg_ADD(ureg, tmp, arg[1], arg[2]);
ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
break;
case D3DTOP_SUBTRACT:
- ureg_SUB(ureg, dst, arg[1], arg[2]);
+ ureg_ADD(ureg, dst, arg[1], ureg_negate(arg[2]));
break;
case D3DTOP_ADDSMOOTH:
- ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
+ ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));
ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]);
break;
case D3DTOP_BLENDDIFFUSEALPHA:
ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]);
break;
case D3DTOP_BLENDTEXTUREALPHA:
/* XXX: alpha taken from previous stage, texture or result ? */
ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]);
break;
case D3DTOP_BLENDFACTORALPHA:
ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]);
break;
case D3DTOP_BLENDTEXTUREALPHAPM:
- ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _W(ps->rTex));
+ ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_W(ps->rTex)));
ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]);
break;
case D3DTOP_BLENDCURRENTALPHA:
ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]);
break;
case D3DTOP_PREMODULATE:
ureg_MOV(ureg, dst, arg[1]);
ps->stage.index_pre_mod = ps->stage.index + 1;
break;
case D3DTOP_MODULATEALPHA_ADDCOLOR:
ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]);
break;
case D3DTOP_MODULATECOLOR_ADDALPHA:
ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1]));
break;
case D3DTOP_MODULATEINVALPHA_ADDCOLOR:
- ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _WWWW(arg[1]));
+ ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_WWWW(arg[1])));
ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]);
break;
case D3DTOP_MODULATEINVCOLOR_ADDALPHA:
- ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
+ ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));
ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1]));
break;
case D3DTOP_BUMPENVMAP:
break;
case D3DTOP_BUMPENVMAPLUMINANCE:
break;
case D3DTOP_DOTPRODUCT3:
- ureg_SUB(ureg, tmp, arg[1], ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
- ureg_SUB(ureg, tmp2, arg[2] , ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
+ ureg_ADD(ureg, tmp, arg[1], ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));
+ ureg_ADD(ureg, tmp2, arg[2] , ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));
ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2));
ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0));
break;
case D3DTOP_MULTIPLYADD:
ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]);
break;
case D3DTOP_LERP:
ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]);
break;
case D3DTOP_DISABLE:
@@ -1529,21 +1529,21 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
ureg_EX2(ureg, rFog, _X(rFog));
} else
if (key->fog_mode == D3DFOG_EXP2) {
ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22)));
ureg_MUL(ureg, rFog, _X(rFog), _X(rFog));
ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
ureg_EX2(ureg, rFog, _X(rFog));
} else
if (key->fog_mode == D3DFOG_LINEAR) {
- ureg_SUB(ureg, rFog, _XXXX(_CONST(22)), _X(rFog));
+ ureg_ADD(ureg, rFog, _XXXX(_CONST(22)), ureg_negate(_X(rFog)));
ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22)));
}
ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21));
ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
} else
if (key->fog) {
struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0, TGSI_INTERPOLATE_PERSPECTIVE);
ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21));
ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
} else {
--
2.7.4
More information about the mesa-dev
mailing list