[Bug 98299] Compute shaders generate stupid divides

bugzilla-daemon at freedesktop.org bugzilla-daemon at freedesktop.org
Mon Oct 17 22:29:04 UTC 2016


https://bugs.freedesktop.org/show_bug.cgi?id=98299

            Bug ID: 98299
           Summary: Compute shaders generate stupid divides
           Product: Mesa
           Version: git
          Hardware: Other
                OS: All
            Status: NEW
          Severity: enhancement
          Priority: medium
         Component: Drivers/DRI/i965
          Assignee: idr at freedesktop.org
          Reporter: idr at freedesktop.org
        QA Contact: intel-3d-bugs at lists.freedesktop.org

While working on GL_ARB_gpu_shader_int64 support, I noticed that compute
shaders with local_size_x = 1 or local_size_y = 1 can generate dumb divides. 
For example,

#extension GL_ARB_gpu_shader_int64 : require
writeonly uniform image2D tex;
layout(local_size_x = 9) in;
uniform uint64_t arg0;
uniform uint64_t arg1;

void main()
{
  vec4 tmp_color;
  if((arg0 >= arg1))
    tmp_color = vec4(1.0, 1.0, 0.0, 1.0);
  else
    tmp_color = vec4(0.0, 0.0, 1.0, 1.0);
  ivec2 coord = ivec2(gl_GlobalInvocationID.xy);
  imageStore(tex, coord, tmp_color);
}

generates:

Native code for unnamed compute shader GLSL2
SIMD16 shader: 52 instructions. 0 loops. 326 cycles. 0:0 spills:fills. Promoted
0 constants. Compacted 832 to 592 bytes (29%)
   START B0
mov(16)         g16<1>UD        g0.1<0,1,0>UD                   { align1 1H
compacted };
mov(16)         g18<1>UD        g0.6<0,1,0>UD                   { align1 1H };
mov(16)         g2<1>UD         0x00000000UD                    { align1 1H
compacted };
cmp.ge.f0(16)   null<1>UQ       g1<0,1,0>UQ     g1.1<0,1,0>UQ   { align1 1H };
mov(16)         g4<1>D          1065353216D                     { align1 1H };
mov(8)          g6<1>UW         0x76543210V                     { align1 WE_all
1Q };
mov(8)          g22<1>UD        0D                              { align1 WE_all
1Q };
mov(8)          g28<1>F         1F                              { align1 1Q };
mul(16)         g14<1>D         g16<8,8,1>D     9D              { align1 1H
compacted };
(-f0) sel(16)   g16<1>UD        g2<8,8,1>UD     0x3f800000UD    { align1 1H };
(-f0) sel(16)   g29<1>UD        g4<8,8,1>UD     0x00000000UD    { align1 1H };
add(8)          g6.8<1>UW       g6<8,8,1>UW     0x0008UW        { align1 WE_all
1Q };
mov(1)          g22.7<1>UD      -1D                             { align1 WE_all
};
mov(8)          g25<1>F         g16<8,8,1>F                     { align1 1Q
compacted };
mov(8)          g26<1>F         g16<8,8,1>F                     { align1 1Q
compacted };
mov(8)          g27<1>F         g29<8,8,1>F                     { align1 1Q
compacted };
mov(16)         g2<1>UD         g6<8,8,1>UW                     { align1 1H };
add(16)         g4<1>D          g2<8,8,1>D      g1.5<0,1,0>D    { align1 1H
compacted };
math intdiv(8)  g6<1>D          g4<8,8,1>D      1D              { align1 1Q
compacted };
math intdiv(8)  g7<1>D          g5<8,8,1>D      1D              { align1 2Q
compacted };
math intdiv(8)  g8<1>D          g4<8,8,1>D      9D              { align1 1Q
compacted };
math intdiv(8)  g9<1>D          g5<8,8,1>D      9D              { align1 2Q
compacted };
math intmod(8)  g10<1>D         g6<8,8,1>D      9D              { align1 1Q
compacted };
math intmod(8)  g11<1>D         g7<8,8,1>D      9D              { align1 2Q
compacted };
math intmod(8)  g12<1>D         g8<8,8,1>D      1D              { align1 1Q
compacted };
math intmod(8)  g13<1>D         g9<8,8,1>D      1D              { align1 2Q
compacted };
mov.nz.f0(16)   null<1>D        g10<8,8,1>D                     { align1 1H };
(+f0) xor.l.f0(16) null<1>D     g6<8,8,1>D      9D              { align1 1H
compacted };
(+f0) add(16)   g10<1>D         g10<8,8,1>D     9D              { align1 1H
compacted };
add(16)         g31<1>D         g14<8,8,1>D     g10<8,8,1>D     { align1 1H
compacted };
mov.nz.f0(16)   null<1>D        g12<8,8,1>D                     { align1 1H };
mov(8)          g23<1>UD        g31<8,8,1>UD                    { align1 1Q
compacted };
(+f0) xor.l.f0(16) null<1>D     g8<8,8,1>D      1D              { align1 1H
compacted };
(+f0) add(16)   g12<1>D         g12<8,8,1>D     1D              { align1 1H
compacted };
add(16)         g20<1>D         g18<8,8,1>D     g12<8,8,1>D     { align1 1H
compacted };
mov(8)          g24<1>UD        g20<8,8,1>UD                    { align1 1Q
compacted };
and(1)          a0<1>UD         g1.4<0,1,0>UD   0x000000ffUD    { align1 WE_all
compacted };
or(1)           a0<1>UD         a0<0,1,0>UD     0x0e0b5000UD    { align1 WE_all
};
send(8)         null<1>UW       g22<8,8,1>UD    a0<0,1,0>UD
                            dp data 1 indirect                              {
align1 1Q };
mov(8)          g2<1>UD         0D                              { align1 WE_all
2Q };
mov(8)          g3<1>UD         g32<8,8,1>UD                    { align1 2Q
compacted };
mov(8)          g4<1>UD         g21<8,8,1>UD                    { align1 2Q
compacted };
mov(8)          g5<1>F          g17<8,8,1>F                     { align1 2Q
compacted };
mov(8)          g6<1>F          g17<8,8,1>F                     { align1 2Q
compacted };
mov(8)          g7<1>F          g30<8,8,1>F                     { align1 2Q
compacted };
mov(8)          g8<1>F          1F                              { align1 2Q };
mov(1)          g2.7<1>UD       -1D                             { align1 WE_all
};
and(1)          a0<1>UD         g1.4<0,1,0>UD   0x000000ffUD    { align1 WE_all
};
or(1)           a0<1>UD         a0<0,1,0>UD     0x0e0b6000UD    { align1 WE_all
};
send(8)         null<1>UW       g2<8,8,1>UD     a0<0,1,0>UD
                            dp data 1 indirect                              {
align1 2Q };
mov(8)          g127<1>UD       g0<8,8,1>UD                     { align1 WE_all
1Q compacted };
send(16)        null<1>UW       g127<8,8,1>UD
                            thread_spawner mlen 1 rlen 0                    {
align1 WE_all 1H EOT };
   END B0


I only spent about 1 minute tracking this down, and it wasn't instantly obvious
what is generating this code.  I wanted to make a note of it before I forgot.
:)

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/intel-3d-bugs/attachments/20161017/c40727f5/attachment-0001.html>


More information about the intel-3d-bugs mailing list