[Bug 98299] Compute shaders generate stupid divides
bugzilla-daemon at freedesktop.org
bugzilla-daemon at freedesktop.org
Mon Oct 17 22:29:04 UTC 2016
https://bugs.freedesktop.org/show_bug.cgi?id=98299
Bug ID: 98299
Summary: Compute shaders generate stupid divides
Product: Mesa
Version: git
Hardware: Other
OS: All
Status: NEW
Severity: enhancement
Priority: medium
Component: Drivers/DRI/i965
Assignee: idr at freedesktop.org
Reporter: idr at freedesktop.org
QA Contact: intel-3d-bugs at lists.freedesktop.org
While working on GL_ARB_gpu_shader_int64 support, I noticed that compute
shaders with local_size_x = 1 or local_size_y = 1 can generate dumb divides.
For example,
#extension GL_ARB_gpu_shader_int64 : require
writeonly uniform image2D tex;
layout(local_size_x = 9) in;
uniform uint64_t arg0;
uniform uint64_t arg1;
void main()
{
vec4 tmp_color;
if((arg0 >= arg1))
tmp_color = vec4(1.0, 1.0, 0.0, 1.0);
else
tmp_color = vec4(0.0, 0.0, 1.0, 1.0);
ivec2 coord = ivec2(gl_GlobalInvocationID.xy);
imageStore(tex, coord, tmp_color);
}
generates:
Native code for unnamed compute shader GLSL2
SIMD16 shader: 52 instructions. 0 loops. 326 cycles. 0:0 spills:fills. Promoted
0 constants. Compacted 832 to 592 bytes (29%)
START B0
mov(16) g16<1>UD g0.1<0,1,0>UD { align1 1H
compacted };
mov(16) g18<1>UD g0.6<0,1,0>UD { align1 1H };
mov(16) g2<1>UD 0x00000000UD { align1 1H
compacted };
cmp.ge.f0(16) null<1>UQ g1<0,1,0>UQ g1.1<0,1,0>UQ { align1 1H };
mov(16) g4<1>D 1065353216D { align1 1H };
mov(8) g6<1>UW 0x76543210V { align1 WE_all
1Q };
mov(8) g22<1>UD 0D { align1 WE_all
1Q };
mov(8) g28<1>F 1F { align1 1Q };
mul(16) g14<1>D g16<8,8,1>D 9D { align1 1H
compacted };
(-f0) sel(16) g16<1>UD g2<8,8,1>UD 0x3f800000UD { align1 1H };
(-f0) sel(16) g29<1>UD g4<8,8,1>UD 0x00000000UD { align1 1H };
add(8) g6.8<1>UW g6<8,8,1>UW 0x0008UW { align1 WE_all
1Q };
mov(1) g22.7<1>UD -1D { align1 WE_all
};
mov(8) g25<1>F g16<8,8,1>F { align1 1Q
compacted };
mov(8) g26<1>F g16<8,8,1>F { align1 1Q
compacted };
mov(8) g27<1>F g29<8,8,1>F { align1 1Q
compacted };
mov(16) g2<1>UD g6<8,8,1>UW { align1 1H };
add(16) g4<1>D g2<8,8,1>D g1.5<0,1,0>D { align1 1H
compacted };
math intdiv(8) g6<1>D g4<8,8,1>D 1D { align1 1Q
compacted };
math intdiv(8) g7<1>D g5<8,8,1>D 1D { align1 2Q
compacted };
math intdiv(8) g8<1>D g4<8,8,1>D 9D { align1 1Q
compacted };
math intdiv(8) g9<1>D g5<8,8,1>D 9D { align1 2Q
compacted };
math intmod(8) g10<1>D g6<8,8,1>D 9D { align1 1Q
compacted };
math intmod(8) g11<1>D g7<8,8,1>D 9D { align1 2Q
compacted };
math intmod(8) g12<1>D g8<8,8,1>D 1D { align1 1Q
compacted };
math intmod(8) g13<1>D g9<8,8,1>D 1D { align1 2Q
compacted };
mov.nz.f0(16) null<1>D g10<8,8,1>D { align1 1H };
(+f0) xor.l.f0(16) null<1>D g6<8,8,1>D 9D { align1 1H
compacted };
(+f0) add(16) g10<1>D g10<8,8,1>D 9D { align1 1H
compacted };
add(16) g31<1>D g14<8,8,1>D g10<8,8,1>D { align1 1H
compacted };
mov.nz.f0(16) null<1>D g12<8,8,1>D { align1 1H };
mov(8) g23<1>UD g31<8,8,1>UD { align1 1Q
compacted };
(+f0) xor.l.f0(16) null<1>D g8<8,8,1>D 1D { align1 1H
compacted };
(+f0) add(16) g12<1>D g12<8,8,1>D 1D { align1 1H
compacted };
add(16) g20<1>D g18<8,8,1>D g12<8,8,1>D { align1 1H
compacted };
mov(8) g24<1>UD g20<8,8,1>UD { align1 1Q
compacted };
and(1) a0<1>UD g1.4<0,1,0>UD 0x000000ffUD { align1 WE_all
compacted };
or(1) a0<1>UD a0<0,1,0>UD 0x0e0b5000UD { align1 WE_all
};
send(8) null<1>UW g22<8,8,1>UD a0<0,1,0>UD
dp data 1 indirect {
align1 1Q };
mov(8) g2<1>UD 0D { align1 WE_all
2Q };
mov(8) g3<1>UD g32<8,8,1>UD { align1 2Q
compacted };
mov(8) g4<1>UD g21<8,8,1>UD { align1 2Q
compacted };
mov(8) g5<1>F g17<8,8,1>F { align1 2Q
compacted };
mov(8) g6<1>F g17<8,8,1>F { align1 2Q
compacted };
mov(8) g7<1>F g30<8,8,1>F { align1 2Q
compacted };
mov(8) g8<1>F 1F { align1 2Q };
mov(1) g2.7<1>UD -1D { align1 WE_all
};
and(1) a0<1>UD g1.4<0,1,0>UD 0x000000ffUD { align1 WE_all
};
or(1) a0<1>UD a0<0,1,0>UD 0x0e0b6000UD { align1 WE_all
};
send(8) null<1>UW g2<8,8,1>UD a0<0,1,0>UD
dp data 1 indirect {
align1 2Q };
mov(8) g127<1>UD g0<8,8,1>UD { align1 WE_all
1Q compacted };
send(16) null<1>UW g127<8,8,1>UD
thread_spawner mlen 1 rlen 0 {
align1 WE_all 1H EOT };
END B0
I only spent about 1 minute tracking this down, and it wasn't instantly obvious
what is generating this code. I wanted to make a note of it before I forgot.
:)
--
You are receiving this mail because:
You are the QA Contact for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/intel-3d-bugs/attachments/20161017/c40727f5/attachment-0001.html>
More information about the intel-3d-bugs
mailing list