Mesa (main): spirv: create ffma more often

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Aug 16 18:05:35 UTC 2021


Module: Mesa
Branch: main
Commit: f6f9000f84db84370262472282065a8d17ed2b36
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=f6f9000f84db84370262472282065a8d17ed2b36

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Tue Jun 16 14:28:18 2020 +0100

spirv: create ffma more often

We will not be able to combine instructions into ffma later if they are
exact, so create them from the start. They can be lowered later if they
are unwanted.

fossil-db (GFX10.3):
Totals from 14697 (10.05% of 146267) affected shaders:
VGPRs: 645736 -> 614168 (-4.89%)
CodeSize: 59312768 -> 58735352 (-0.97%); split: -0.97%, +0.00%
MaxWaves: 372900 -> 376666 (+1.01%)
Instrs: 11339280 -> 11120882 (-1.93%); split: -1.93%, +0.00%
Latency: 284874519 -> 285277327 (+0.14%); split: -0.10%, +0.24%
InvThroughput: 68791374 -> 68526739 (-0.38%); split: -0.49%, +0.10%

fossil-db (GFX10):
Totals from 11039 (7.55% of 146267) affected shaders:
CodeSize: 54785444 -> 54785268 (-0.00%); split: -0.00%, +0.00%
Instrs: 10401349 -> 10401396 (+0.00%); split: -0.00%, +0.00%
Latency: 277781803 -> 278572890 (+0.28%); split: -0.00%, +0.29%
InvThroughput: 65035902 -> 65100855 (+0.10%); split: -0.00%, +0.10%

fossil-db (GFX9):
Totals from 24055 (16.43% of 146401) affected shaders:
SGPRs: 1790704 -> 1790640 (-0.00%)
VGPRs: 1105736 -> 1105716 (-0.00%)
CodeSize: 110944732 -> 110948812 (+0.00%); split: -0.00%, +0.01%
Instrs: 21609095 -> 21610227 (+0.01%); split: -0.00%, +0.01%
Latency: 756137596 -> 756145812 (+0.00%); split: -0.02%, +0.02%
InvThroughput: 344103825 -> 344112245 (+0.00%); split: -0.00%, +0.01%

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8056>

---

 src/compiler/spirv/vtn_alu.c     |  4 +--
 src/compiler/spirv/vtn_glsl450.c | 54 ++++++++++++++++++----------------------
 2 files changed, 26 insertions(+), 32 deletions(-)

diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c
index 4298292d05f..48f41ac249a 100644
--- a/src/compiler/spirv/vtn_alu.c
+++ b/src/compiler/spirv/vtn_alu.c
@@ -122,8 +122,8 @@ matrix_multiply(struct vtn_builder *b,
                      nir_channel(&b->nb, src1->elems[i]->def, src0_columns - 1));
          for (int j = src0_columns - 2; j >= 0; j--) {
             dest->elems[i]->def =
-               nir_fadd(&b->nb, nir_fmul(&b->nb, src0->elems[j]->def,
-                                         nir_channel(&b->nb, src1->elems[i]->def, j)),
+               nir_ffma(&b->nb, src0->elems[j]->def,
+                                nir_channel(&b->nb, src1->elems[i]->def, j),
                                 dest->elems[i]->def);
          }
       }
diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c
index f253b68fef0..d6ab4c03d21 100644
--- a/src/compiler/spirv/vtn_glsl450.c
+++ b/src/compiler/spirv/vtn_glsl450.c
@@ -202,19 +202,17 @@ build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1, bool piecewise)
    nir_ssa_def *half = nir_imm_floatN_t(b, 0.5f, x->bit_size);
    nir_ssa_def *abs_x = nir_fabs(b, x);
 
-   nir_ssa_def *p0_plus_xp1 = nir_fadd_imm(b, nir_fmul_imm(b, abs_x, p1), p0);
+   nir_ssa_def *p0_plus_xp1 = nir_ffma_imm12(b, abs_x, p1, p0);
 
    nir_ssa_def *expr_tail =
-      nir_fadd_imm(b, nir_fmul(b, abs_x,
-                                  nir_fadd_imm(b, nir_fmul(b, abs_x,
-                                                               p0_plus_xp1),
-                                                  M_PI_4f - 1.0f)),
-                      M_PI_2f);
+      nir_ffma_imm2(b, abs_x,
+                       nir_ffma_imm2(b, abs_x, p0_plus_xp1, M_PI_4f - 1.0f),
+                       M_PI_2f);
 
    nir_ssa_def *result0 = nir_fmul(b, nir_fsign(b, x),
-                      nir_fsub(b, nir_imm_floatN_t(b, M_PI_2f, x->bit_size),
-                                  nir_fmul(b, nir_fsqrt(b, nir_fsub(b, one, abs_x)),
-                                                           expr_tail)));
+                      nir_a_minus_bc(b, nir_imm_floatN_t(b, M_PI_2f, x->bit_size),
+                                        nir_fsqrt(b, nir_fsub(b, one, abs_x)),
+                                        expr_tail));
    if (piecewise) {
       /* approximation for |x| < 0.5 */
       const float pS0 =  1.6666586697e-01f;
@@ -225,15 +223,12 @@ build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1, bool piecewise)
       nir_ssa_def *x2 = nir_fmul(b, x, x);
       nir_ssa_def *p = nir_fmul(b,
                                 x2,
-                                nir_fadd_imm(b,
-                                             nir_fmul(b,
-                                                      x2,
-                                                      nir_fadd_imm(b, nir_fmul_imm(b, x2, pS2),
-                                                                   pS1)),
-                                             pS0));
-
-      nir_ssa_def *q = nir_fadd(b, nir_fmul_imm(b, x2, qS1), one);
-      nir_ssa_def *result1 = nir_fadd(b, nir_fmul(b, x, nir_fdiv(b, p, q)), x);
+                                nir_ffma_imm2(b, x2,
+                                                 nir_ffma_imm12(b, x2, pS2, pS1),
+                                                 pS0));
+
+      nir_ssa_def *q = nir_ffma_imm1(b, x2, qS1, one);
+      nir_ssa_def *result1 = nir_ffma(b, x, nir_fdiv(b, p, q), x);
       return nir_bcsel(b, nir_flt(b, abs_x, half), result1, result0);
    } else {
       return result0;
@@ -414,9 +409,10 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
    case GLSLstd450Reflect:
       /* I - 2 * dot(N, I) * N */
       dest->def =
-         nir_fsub(nb, src[0], nir_fmul(nb, NIR_IMM_FP(nb, 2.0),
-                              nir_fmul(nb, nir_fdot(nb, src[0], src[1]),
-                                           src[1])));
+         nir_a_minus_bc(nb, src[0],
+                            src[1],
+                            nir_fmul(nb, nir_fdot(nb, src[0], src[1]),
+                                         NIR_IMM_FP(nb, 2.0)));
       break;
 
    case GLSLstd450Refract: {
@@ -442,12 +438,12 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
       }
       /* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */
       nir_ssa_def *k =
-         nir_fsub(nb, one, nir_fmul(nb, eta, nir_fmul(nb, eta,
-                      nir_fsub(nb, one, nir_fmul(nb, n_dot_i, n_dot_i)))));
+         nir_a_minus_bc(nb, one, eta,
+                            nir_fmul(nb, eta, nir_a_minus_bc(nb, one, n_dot_i, n_dot_i)));
       nir_ssa_def *result =
-         nir_fsub(nb, nir_fmul(nb, eta, I),
-                      nir_fmul(nb, nir_fadd(nb, nir_fmul(nb, eta, n_dot_i),
-                                                nir_fsqrt(nb, k)), N));
+         nir_a_minus_bc(nb, nir_fmul(nb, eta, I),
+                            nir_ffma(nb, eta, n_dot_i, nir_fsqrt(nb, k)),
+                            N);
       /* XXX: bcsel, or if statement? */
       dest->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result);
       break;
@@ -494,13 +490,11 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
    case GLSLstd450Asinh:
       dest->def = nir_fmul(nb, nir_fsign(nb, src[0]),
          nir_flog(nb, nir_fadd(nb, nir_fabs(nb, src[0]),
-                      nir_fsqrt(nb, nir_fadd_imm(nb, nir_fmul(nb, src[0], src[0]),
-                                                    1.0f)))));
+                      nir_fsqrt(nb, nir_ffma_imm2(nb, src[0], src[0], 1.0f)))));
       break;
    case GLSLstd450Acosh:
       dest->def = nir_flog(nb, nir_fadd(nb, src[0],
-         nir_fsqrt(nb, nir_fadd_imm(nb, nir_fmul(nb, src[0], src[0]),
-                                        -1.0f))));
+         nir_fsqrt(nb, nir_ffma_imm2(nb, src[0], src[0], -1.0f))));
       break;
    case GLSLstd450Atanh: {
       nir_ssa_def *one = nir_imm_floatN_t(nb, 1.0, src[0]->bit_size);



More information about the mesa-commit mailing list