<div dir="auto"><div><br><br><div class="gmail_quote"><div dir="ltr">Den ons 19 dec. 2018 17:44 skrev Jonathan Marek <<a href="mailto:jonathan@marek.ca" target="_blank" rel="noreferrer">jonathan@marek.ca</a>>:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">When ffma is available, we can use a different arrangement of constants to<br>
get a better result. On freedreno/ir3, this reduces the YUV->RGB to 7<br>
scalar ffma. On freedreno/a2xx, it will allow YUV->RGB to be 3 vec4 ffma.<br>
<br>
Signed-off-by: Jonathan Marek <<a href="mailto:jonathan@marek.ca" rel="noreferrer noreferrer" target="_blank">jonathan@marek.ca</a>><br>
---<br>
 src/compiler/nir/nir_lower_tex.c | 62 ++++++++++++++++++++++----------<br>
 1 file changed, 43 insertions(+), 19 deletions(-)<br>
<br>
diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c<br>
index 6a6b6c41a7..f7c821bb34 100644<br>
--- a/src/compiler/nir/nir_lower_tex.c<br>
+++ b/src/compiler/nir/nir_lower_tex.c<br>
@@ -342,25 +342,49 @@ convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,<br>
                    nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,<br>
                    nir_ssa_def *a)<br>
 {<br>
-   nir_const_value m[3] = {<br>
-      { .f32 = { 1.0f,  0.0f,         1.59602678f, 0.0f } },<br>
-      { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } },<br>
-      { .f32 = { 1.0f,  2.01723214f,  0.0f,        0.0f } }<br>
-   };<br>
-<br>
-   nir_ssa_def *yuv =<br>
-      nir_vec4(b,<br>
-               nir_fmul(b, nir_imm_float(b, 1.16438356f),<br>
-                        nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))),<br>
-               nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0),<br>
-               nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0),<br>
-               nir_imm_float(b, 0.0));<br>
-<br>
-   nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0]));<br>
-   nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1]));<br>
-   nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2]));<br>
-<br>
-   nir_ssa_def *result = nir_vec4(b, red, green, blue, a);<br>
+   nir_ssa_def *result;<br>
+<br>
+<br>
+   if (b->shader->options->fuse_ffma) {<br>
+      nir_const_value m[4] = {<br></blockquote></div></div><div dir="auto"><br></div><div dir="auto"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"></blockquote></div></div><div dir="auto">Drive-by comment, but shouldn't this^ be m[3]?</div><div dir="auto"><br></div><div dir="auto">Regards</div><div dir="auto">Nils</div><div dir="auto"><br></div><div dir="auto"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+         { .f32 = { 1.16438356f, 1.16438356f, 1.16438356f, 0.0f } },<br>
+         { .f32 = { 0.0f,       -0.39176229f, 2.01723214f, 0.0f } },<br>
+         { .f32 = { 1.59602678f,-0.81296764f, 0.0f,        0.0f } },<br>
+      };<br>
+      static const float y_off = -16.0f * 1.16438356f / 255.0f;<br>
+      static const float sc = 128.0f / 255.0f;<br>
+<br>
+      nir_ssa_def *offset =<br>
+         nir_vec4(b,<br>
+                  nir_imm_float(b, y_off - sc * 1.59602678f),<br>
+                  nir_imm_float(b, y_off + sc * (0.81296764f + 0.39176229f)),<br>
+                  nir_imm_float(b, y_off - sc * 2.01723214f),<br>
+                  a);<br>
+<br>
+      result = nir_ffma(b, y, nir_build_imm(b, 4, 32, m[0]),<br>
+                       nir_ffma(b, u, nir_build_imm(b, 4, 32, m[1]),<br>
+                                nir_ffma(b, v, nir_build_imm(b, 4, 32, m[2]), offset)));<br>
+   } else {<br>
+      nir_const_value m[3] = {<br>
+         { .f32 = { 1.0f,  0.0f,         1.59602678f, 0.0f } },<br>
+         { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } },<br>
+         { .f32 = { 1.0f,  2.01723214f,  0.0f,        0.0f } }<br>
+      };<br>
+<br>
+      nir_ssa_def *yuv =<br>
+         nir_vec4(b,<br>
+                  nir_fmul(b, nir_imm_float(b, 1.16438356f),<br>
+                           nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))),<br>
+                  nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0),<br>
+                  nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0),<br>
+                  nir_imm_float(b, 0.0));<br>
+<br>
+      nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0]));<br>
+      nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1]));<br>
+      nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2]));<br>
+<br>
+      result = nir_vec4(b, red, green, blue, a);<br>
+   }<br>
<br>
    nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result));<br>
 }<br>
-- <br>
2.17.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" rel="noreferrer noreferrer" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer noreferrer noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div></div></div>