<div dir="ltr">On 27 March 2013 09:30, Ian Romanick <span dir="ltr"><<a href="mailto:idr@freedesktop.org" target="_blank">idr@freedesktop.org</a>></span> wrote:<br><div class="gmail_extra"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
From: Ian Romanick <<a href="mailto:ian.d.romanick@intel.com" target="_blank">ian.d.romanick@intel.com</a>><br>
<br>
Search and replace:<br>
<br>
][0] -> ].x<br>
][1] -> ].y<br>
][2] -> ].z<br>
][3] -> ].w<br>
<br>
Fixes piglit tests inverse-mat[234].{vert,frag}. These tests call the<br>
inverse function with constant parameters and expect proper constant<br>
folding to happen. My suspicion is that this patch papers over some bug<br>
in constant propagation involving array accesses.<br>
<br>
Either way, all of these accesses eventually get lowered to swizzles.<br>
This cuts out the middle man (saving a trivial amount of CPU).<br>
<br>
NOTE: This is a candidate for the 9.1 branch.<br></blockquote><div><br></div><div>I've tracked down the constant propagation bug that this patch papers over, and I just sent out a bug fix ("glsl: Fix array indexing when constant folding built-in functions.")<br>
<br>Assuming my bug fix is correct, can we NAK this patch? I think it makes the source code harder to read, and as you point out the benefit is just to save a trivial amount of CPU.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<br>
Signed-off-by: Ian Romanick <<a href="mailto:ian.d.romanick@intel.com" target="_blank">ian.d.romanick@intel.com</a>><br>
Cc: Eric Anholt <<a href="mailto:eric@anholt.net" target="_blank">eric@anholt.net</a>><br>
Cc: Paul Berry <<a href="mailto:stereotype441@gmail.com" target="_blank">stereotype441@gmail.com</a>><br>
---<br>
src/glsl/builtins/glsl/determinant.glsl | 62 +++++++++---------<br>
src/glsl/builtins/glsl/inverse.glsl | 112 ++++++++++++++++----------------<br>
2 files changed, 87 insertions(+), 87 deletions(-)<br>
<br>
diff --git a/src/glsl/builtins/glsl/determinant.glsl b/src/glsl/builtins/glsl/determinant.glsl<br>
index 32695a8..78751a6 100644<br>
--- a/src/glsl/builtins/glsl/determinant.glsl<br>
+++ b/src/glsl/builtins/glsl/determinant.glsl<br>
@@ -24,47 +24,47 @@<br>
#version 120<br>
float determinant(mat2 m)<br>
{<br>
- return m[0][0] * m[1][1] - m[1][0] * m[0][1];<br>
+ return m[0].x * m[1].y - m[1].x * m[0].y;<br>
}<br>
<br>
float determinant(mat3 m)<br>
{<br>
- return (+ m[0][0] * (m[1][1] * m[2][2] - m[1][2] * m[2][1])<br>
- - m[0][1] * (m[1][0] * m[2][2] - m[1][2] * m[2][0])<br>
- + m[0][2] * (m[1][0] * m[2][1] - m[1][1] * m[2][0]));<br>
+ return (+ m[0].x * (m[1].y * m[2].z - m[1].z * m[2].y)<br>
+ - m[0].y * (m[1].x * m[2].z - m[1].z * m[2].x)<br>
+ + m[0].z * (m[1].x * m[2].y - m[1].y * m[2].x));<br>
}<br>
<br>
float determinant(mat4 m)<br>
{<br>
- float SubFactor00 = m[2][2] * m[3][3] - m[3][2] * m[2][3];<br>
- float SubFactor01 = m[2][1] * m[3][3] - m[3][1] * m[2][3];<br>
- float SubFactor02 = m[2][1] * m[3][2] - m[3][1] * m[2][2];<br>
- float SubFactor03 = m[2][0] * m[3][3] - m[3][0] * m[2][3];<br>
- float SubFactor04 = m[2][0] * m[3][2] - m[3][0] * m[2][2];<br>
- float SubFactor05 = m[2][0] * m[3][1] - m[3][0] * m[2][1];<br>
- float SubFactor06 = m[1][2] * m[3][3] - m[3][2] * m[1][3];<br>
- float SubFactor07 = m[1][1] * m[3][3] - m[3][1] * m[1][3];<br>
- float SubFactor08 = m[1][1] * m[3][2] - m[3][1] * m[1][2];<br>
- float SubFactor09 = m[1][0] * m[3][3] - m[3][0] * m[1][3];<br>
- float SubFactor10 = m[1][0] * m[3][2] - m[3][0] * m[1][2];<br>
- float SubFactor11 = m[1][1] * m[3][3] - m[3][1] * m[1][3];<br>
- float SubFactor12 = m[1][0] * m[3][1] - m[3][0] * m[1][1];<br>
- float SubFactor13 = m[1][2] * m[2][3] - m[2][2] * m[1][3];<br>
- float SubFactor14 = m[1][1] * m[2][3] - m[2][1] * m[1][3];<br>
- float SubFactor15 = m[1][1] * m[2][2] - m[2][1] * m[1][2];<br>
- float SubFactor16 = m[1][0] * m[2][3] - m[2][0] * m[1][3];<br>
- float SubFactor17 = m[1][0] * m[2][2] - m[2][0] * m[1][2];<br>
- float SubFactor18 = m[1][0] * m[2][1] - m[2][0] * m[1][1];<br>
+ float SubFactor00 = m[2].z * m[3].w - m[3].z * m[2].w;<br>
+ float SubFactor01 = m[2].y * m[3].w - m[3].y * m[2].w;<br>
+ float SubFactor02 = m[2].y * m[3].z - m[3].y * m[2].z;<br>
+ float SubFactor03 = m[2].x * m[3].w - m[3].x * m[2].w;<br>
+ float SubFactor04 = m[2].x * m[3].z - m[3].x * m[2].z;<br>
+ float SubFactor05 = m[2].x * m[3].y - m[3].x * m[2].y;<br>
+ float SubFactor06 = m[1].z * m[3].w - m[3].z * m[1].w;<br>
+ float SubFactor07 = m[1].y * m[3].w - m[3].y * m[1].w;<br>
+ float SubFactor08 = m[1].y * m[3].z - m[3].y * m[1].z;<br>
+ float SubFactor09 = m[1].x * m[3].w - m[3].x * m[1].w;<br>
+ float SubFactor10 = m[1].x * m[3].z - m[3].x * m[1].z;<br>
+ float SubFactor11 = m[1].y * m[3].w - m[3].y * m[1].w;<br>
+ float SubFactor12 = m[1].x * m[3].y - m[3].x * m[1].y;<br>
+ float SubFactor13 = m[1].z * m[2].w - m[2].z * m[1].w;<br>
+ float SubFactor14 = m[1].y * m[2].w - m[2].y * m[1].w;<br>
+ float SubFactor15 = m[1].y * m[2].z - m[2].y * m[1].z;<br>
+ float SubFactor16 = m[1].x * m[2].w - m[2].x * m[1].w;<br>
+ float SubFactor17 = m[1].x * m[2].z - m[2].x * m[1].z;<br>
+ float SubFactor18 = m[1].x * m[2].y - m[2].x * m[1].y;<br>
<br>
vec4 adj_0;<br>
<br>
- adj_0[0] = + (m[1][1] * SubFactor00 - m[1][2] * SubFactor01 + m[1][3] * SubFactor02);<br>
- adj_0[1] = - (m[1][0] * SubFactor00 - m[1][2] * SubFactor03 + m[1][3] * SubFactor04);<br>
- adj_0[2] = + (m[1][0] * SubFactor01 - m[1][1] * SubFactor03 + m[1][3] * SubFactor05);<br>
- adj_0[3] = - (m[1][0] * SubFactor02 - m[1][1] * SubFactor04 + m[1][2] * SubFactor05);<br>
+ adj_0.x = + (m[1].y * SubFactor00 - m[1].z * SubFactor01 + m[1].w * SubFactor02);<br>
+ adj_0.y = - (m[1].x * SubFactor00 - m[1].z * SubFactor03 + m[1].w * SubFactor04);<br>
+ adj_0.z = + (m[1].x * SubFactor01 - m[1].y * SubFactor03 + m[1].w * SubFactor05);<br>
+ adj_0.w = - (m[1].x * SubFactor02 - m[1].y * SubFactor04 + m[1].z * SubFactor05);<br>
<br>
- return (+ m[0][0] * adj_0[0]<br>
- + m[0][1] * adj_0[1]<br>
- + m[0][2] * adj_0[2]<br>
- + m[0][3] * adj_0[3]);<br>
+ return (+ m[0].x * adj_0.x<br>
+ + m[0].y * adj_0.y<br>
+ + m[0].z * adj_0.z<br>
+ + m[0].w * adj_0.w);<br>
}<br>
diff --git a/src/glsl/builtins/glsl/inverse.glsl b/src/glsl/builtins/glsl/inverse.glsl<br>
index ffb84f9..3578d98 100644<br>
--- a/src/glsl/builtins/glsl/inverse.glsl<br>
+++ b/src/glsl/builtins/glsl/inverse.glsl<br>
@@ -24,82 +24,82 @@<br>
mat2 inverse(mat2 m)<br>
{<br>
mat2 adj;<br>
- adj[0][0] = m[1][1];<br>
- adj[0][1] = -m[0][1];<br>
- adj[1][0] = -m[1][0];<br>
- adj[1][1] = m[0][0];<br>
- float det = m[0][0] * m[1][1] - m[1][0] * m[0][1];<br>
+ adj[0].x = m[1].y;<br>
+ adj[0].y = -m[0].y;<br>
+ adj[1].x = -m[1].x;<br>
+ adj[1].y = m[0].x;<br>
+ float det = m[0].x * m[1].y - m[1].x * m[0].y;<br>
return adj / det;<br>
}<br>
<br>
mat3 inverse(mat3 m)<br>
{<br>
mat3 adj;<br>
- adj[0][0] = + (m[1][1] * m[2][2] - m[2][1] * m[1][2]);<br>
- adj[1][0] = - (m[1][0] * m[2][2] - m[2][0] * m[1][2]);<br>
- adj[2][0] = + (m[1][0] * m[2][1] - m[2][0] * m[1][1]);<br>
- adj[0][1] = - (m[0][1] * m[2][2] - m[2][1] * m[0][2]);<br>
- adj[1][1] = + (m[0][0] * m[2][2] - m[2][0] * m[0][2]);<br>
- adj[2][1] = - (m[0][0] * m[2][1] - m[2][0] * m[0][1]);<br>
- adj[0][2] = + (m[0][1] * m[1][2] - m[1][1] * m[0][2]);<br>
- adj[1][2] = - (m[0][0] * m[1][2] - m[1][0] * m[0][2]);<br>
- adj[2][2] = + (m[0][0] * m[1][1] - m[1][0] * m[0][1]);<br>
+ adj[0].x = + (m[1].y * m[2].z - m[2].y * m[1].z);<br>
+ adj[1].x = - (m[1].x * m[2].z - m[2].x * m[1].z);<br>
+ adj[2].x = + (m[1].x * m[2].y - m[2].x * m[1].y);<br>
+ adj[0].y = - (m[0].y * m[2].z - m[2].y * m[0].z);<br>
+ adj[1].y = + (m[0].x * m[2].z - m[2].x * m[0].z);<br>
+ adj[2].y = - (m[0].x * m[2].y - m[2].x * m[0].y);<br>
+ adj[0].z = + (m[0].y * m[1].z - m[1].y * m[0].z);<br>
+ adj[1].z = - (m[0].x * m[1].z - m[1].x * m[0].z);<br>
+ adj[2].z = + (m[0].x * m[1].y - m[1].x * m[0].y);<br>
<br>
- float det = (+ m[0][0] * (m[1][1] * m[2][2] - m[1][2] * m[2][1])<br>
- - m[0][1] * (m[1][0] * m[2][2] - m[1][2] * m[2][0])<br>
- + m[0][2] * (m[1][0] * m[2][1] - m[1][1] * m[2][0]));<br>
+ float det = (+ m[0].x * (m[1].y * m[2].z - m[1].z * m[2].y)<br>
+ - m[0].y * (m[1].x * m[2].z - m[1].z * m[2].x)<br>
+ + m[0].z * (m[1].x * m[2].y - m[1].y * m[2].x));<br>
<br>
return adj / det;<br>
}<br>
<br>
mat4 inverse(mat4 m)<br>
{<br>
- float SubFactor00 = m[2][2] * m[3][3] - m[3][2] * m[2][3];<br>
- float SubFactor01 = m[2][1] * m[3][3] - m[3][1] * m[2][3];<br>
- float SubFactor02 = m[2][1] * m[3][2] - m[3][1] * m[2][2];<br>
- float SubFactor03 = m[2][0] * m[3][3] - m[3][0] * m[2][3];<br>
- float SubFactor04 = m[2][0] * m[3][2] - m[3][0] * m[2][2];<br>
- float SubFactor05 = m[2][0] * m[3][1] - m[3][0] * m[2][1];<br>
- float SubFactor06 = m[1][2] * m[3][3] - m[3][2] * m[1][3];<br>
- float SubFactor07 = m[1][1] * m[3][3] - m[3][1] * m[1][3];<br>
- float SubFactor08 = m[1][1] * m[3][2] - m[3][1] * m[1][2];<br>
- float SubFactor09 = m[1][0] * m[3][3] - m[3][0] * m[1][3];<br>
- float SubFactor10 = m[1][0] * m[3][2] - m[3][0] * m[1][2];<br>
- float SubFactor11 = m[1][1] * m[3][3] - m[3][1] * m[1][3];<br>
- float SubFactor12 = m[1][0] * m[3][1] - m[3][0] * m[1][1];<br>
- float SubFactor13 = m[1][2] * m[2][3] - m[2][2] * m[1][3];<br>
- float SubFactor14 = m[1][1] * m[2][3] - m[2][1] * m[1][3];<br>
- float SubFactor15 = m[1][1] * m[2][2] - m[2][1] * m[1][2];<br>
- float SubFactor16 = m[1][0] * m[2][3] - m[2][0] * m[1][3];<br>
- float SubFactor17 = m[1][0] * m[2][2] - m[2][0] * m[1][2];<br>
- float SubFactor18 = m[1][0] * m[2][1] - m[2][0] * m[1][1];<br>
+ float SubFactor00 = m[2].z * m[3].w - m[3].z * m[2].w;<br>
+ float SubFactor01 = m[2].y * m[3].w - m[3].y * m[2].w;<br>
+ float SubFactor02 = m[2].y * m[3].z - m[3].y * m[2].z;<br>
+ float SubFactor03 = m[2].x * m[3].w - m[3].x * m[2].w;<br>
+ float SubFactor04 = m[2].x * m[3].z - m[3].x * m[2].z;<br>
+ float SubFactor05 = m[2].x * m[3].y - m[3].x * m[2].y;<br>
+ float SubFactor06 = m[1].z * m[3].w - m[3].z * m[1].w;<br>
+ float SubFactor07 = m[1].y * m[3].w - m[3].y * m[1].w;<br>
+ float SubFactor08 = m[1].y * m[3].z - m[3].y * m[1].z;<br>
+ float SubFactor09 = m[1].x * m[3].w - m[3].x * m[1].w;<br>
+ float SubFactor10 = m[1].x * m[3].z - m[3].x * m[1].z;<br>
+ float SubFactor11 = m[1].y * m[3].w - m[3].y * m[1].w;<br>
+ float SubFactor12 = m[1].x * m[3].y - m[3].x * m[1].y;<br>
+ float SubFactor13 = m[1].z * m[2].w - m[2].z * m[1].w;<br>
+ float SubFactor14 = m[1].y * m[2].w - m[2].y * m[1].w;<br>
+ float SubFactor15 = m[1].y * m[2].z - m[2].y * m[1].z;<br>
+ float SubFactor16 = m[1].x * m[2].w - m[2].x * m[1].w;<br>
+ float SubFactor17 = m[1].x * m[2].z - m[2].x * m[1].z;<br>
+ float SubFactor18 = m[1].x * m[2].y - m[2].x * m[1].y;<br>
<br>
mat4 adj;<br>
<br>
- adj[0][0] = + (m[1][1] * SubFactor00 - m[1][2] * SubFactor01 + m[1][3] * SubFactor02);<br>
- adj[1][0] = - (m[1][0] * SubFactor00 - m[1][2] * SubFactor03 + m[1][3] * SubFactor04);<br>
- adj[2][0] = + (m[1][0] * SubFactor01 - m[1][1] * SubFactor03 + m[1][3] * SubFactor05);<br>
- adj[3][0] = - (m[1][0] * SubFactor02 - m[1][1] * SubFactor04 + m[1][2] * SubFactor05);<br>
+ adj[0].x = + (m[1].y * SubFactor00 - m[1].z * SubFactor01 + m[1].w * SubFactor02);<br>
+ adj[1].x = - (m[1].x * SubFactor00 - m[1].z * SubFactor03 + m[1].w * SubFactor04);<br>
+ adj[2].x = + (m[1].x * SubFactor01 - m[1].y * SubFactor03 + m[1].w * SubFactor05);<br>
+ adj[3].x = - (m[1].x * SubFactor02 - m[1].y * SubFactor04 + m[1].z * SubFactor05);<br>
<br>
- adj[0][1] = - (m[0][1] * SubFactor00 - m[0][2] * SubFactor01 + m[0][3] * SubFactor02);<br>
- adj[1][1] = + (m[0][0] * SubFactor00 - m[0][2] * SubFactor03 + m[0][3] * SubFactor04);<br>
- adj[2][1] = - (m[0][0] * SubFactor01 - m[0][1] * SubFactor03 + m[0][3] * SubFactor05);<br>
- adj[3][1] = + (m[0][0] * SubFactor02 - m[0][1] * SubFactor04 + m[0][2] * SubFactor05);<br>
+ adj[0].y = - (m[0].y * SubFactor00 - m[0].z * SubFactor01 + m[0].w * SubFactor02);<br>
+ adj[1].y = + (m[0].x * SubFactor00 - m[0].z * SubFactor03 + m[0].w * SubFactor04);<br>
+ adj[2].y = - (m[0].x * SubFactor01 - m[0].y * SubFactor03 + m[0].w * SubFactor05);<br>
+ adj[3].y = + (m[0].x * SubFactor02 - m[0].y * SubFactor04 + m[0].z * SubFactor05);<br>
<br>
- adj[0][2] = + (m[0][1] * SubFactor06 - m[0][2] * SubFactor07 + m[0][3] * SubFactor08);<br>
- adj[1][2] = - (m[0][0] * SubFactor06 - m[0][2] * SubFactor09 + m[0][3] * SubFactor10);<br>
- adj[2][2] = + (m[0][0] * SubFactor11 - m[0][1] * SubFactor09 + m[0][3] * SubFactor12);<br>
- adj[3][2] = - (m[0][0] * SubFactor08 - m[0][1] * SubFactor10 + m[0][2] * SubFactor12);<br>
+ adj[0].z = + (m[0].y * SubFactor06 - m[0].z * SubFactor07 + m[0].w * SubFactor08);<br>
+ adj[1].z = - (m[0].x * SubFactor06 - m[0].z * SubFactor09 + m[0].w * SubFactor10);<br>
+ adj[2].z = + (m[0].x * SubFactor11 - m[0].y * SubFactor09 + m[0].w * SubFactor12);<br>
+ adj[3].z = - (m[0].x * SubFactor08 - m[0].y * SubFactor10 + m[0].z * SubFactor12);<br>
<br>
- adj[0][3] = - (m[0][1] * SubFactor13 - m[0][2] * SubFactor14 + m[0][3] * SubFactor15);<br>
- adj[1][3] = + (m[0][0] * SubFactor13 - m[0][2] * SubFactor16 + m[0][3] * SubFactor17);<br>
- adj[2][3] = - (m[0][0] * SubFactor14 - m[0][1] * SubFactor16 + m[0][3] * SubFactor18);<br>
- adj[3][3] = + (m[0][0] * SubFactor15 - m[0][1] * SubFactor17 + m[0][2] * SubFactor18);<br>
+ adj[0].w = - (m[0].y * SubFactor13 - m[0].z * SubFactor14 + m[0].w * SubFactor15);<br>
+ adj[1].w = + (m[0].x * SubFactor13 - m[0].z * SubFactor16 + m[0].w * SubFactor17);<br>
+ adj[2].w = - (m[0].x * SubFactor14 - m[0].y * SubFactor16 + m[0].w * SubFactor18);<br>
+ adj[3].w = + (m[0].x * SubFactor15 - m[0].y * SubFactor17 + m[0].z * SubFactor18);<br>
<br>
- float det = (+ m[0][0] * adj[0][0]<br>
- + m[0][1] * adj[1][0]<br>
- + m[0][2] * adj[2][0]<br>
- + m[0][3] * adj[3][0]);<br>
+ float det = (+ m[0].x * adj[0].x<br>
+ + m[0].y * adj[1].x<br>
+ + m[0].z * adj[2].x<br>
+ + m[0].w * adj[3].x);<br>
<br>
return adj / det;<br>
}<br>
<span><font color="#888888">--<br>
1.8.1.4<br>
<br>
</font></span></blockquote></div><br></div></div>