<html>
    <head>
      <base href="https://bugs.freedesktop.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - Compute shaders generate stupid divides"
   href="https://bugs.freedesktop.org/show_bug.cgi?id=98299">98299</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>Compute shaders generate stupid divides
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>Mesa
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>git
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>Other
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>All
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>medium
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Drivers/DRI/i965
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>idr@freedesktop.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>idr@freedesktop.org
          </td>
        </tr>

        <tr>
          <th>QA Contact</th>
          <td>intel-3d-bugs@lists.freedesktop.org
          </td>
        </tr></table>
      <p>
        <div>
        <pre>While working on GL_ARB_gpu_shader_int64 support, I noticed that compute
shaders with local_size_x = 1 or local_size_y = 1 can generate dumb divides. 
For example,

#extension GL_ARB_gpu_shader_int64 : require
writeonly uniform image2D tex;
layout(local_size_x = 9) in;
uniform uint64_t arg0;
uniform uint64_t arg1;

void main()
{
  vec4 tmp_color;
  if((arg0 >= arg1))
    tmp_color = vec4(1.0, 1.0, 0.0, 1.0);
  else
    tmp_color = vec4(0.0, 0.0, 1.0, 1.0);
  ivec2 coord = ivec2(gl_GlobalInvocationID.xy);
  imageStore(tex, coord, tmp_color);
}

generates:

Native code for unnamed compute shader GLSL2
SIMD16 shader: 52 instructions. 0 loops. 326 cycles. 0:0 spills:fills. Promoted
0 constants. Compacted 832 to 592 bytes (29%)
   START B0
mov(16)         g16<1>UD        g0.1<0,1,0>UD                   { align1 1H
compacted };
mov(16)         g18<1>UD        g0.6<0,1,0>UD                   { align1 1H };
mov(16)         g2<1>UD         0x00000000UD                    { align1 1H
compacted };
cmp.ge.f0(16)   null<1>UQ       g1<0,1,0>UQ     g1.1<0,1,0>UQ   { align1 1H };
mov(16)         g4<1>D          1065353216D                     { align1 1H };
mov(8)          g6<1>UW         0x76543210V                     { align1 WE_all
1Q };
mov(8)          g22<1>UD        0D                              { align1 WE_all
1Q };
mov(8)          g28<1>F         1F                              { align1 1Q };
mul(16)         g14<1>D         g16<8,8,1>D     9D              { align1 1H
compacted };
(-f0) sel(16)   g16<1>UD        g2<8,8,1>UD     0x3f800000UD    { align1 1H };
(-f0) sel(16)   g29<1>UD        g4<8,8,1>UD     0x00000000UD    { align1 1H };
add(8)          g6.8<1>UW       g6<8,8,1>UW     0x0008UW        { align1 WE_all
1Q };
mov(1)          g22.7<1>UD      -1D                             { align1 WE_all
};
mov(8)          g25<1>F         g16<8,8,1>F                     { align1 1Q
compacted };
mov(8)          g26<1>F         g16<8,8,1>F                     { align1 1Q
compacted };
mov(8)          g27<1>F         g29<8,8,1>F                     { align1 1Q
compacted };
mov(16)         g2<1>UD         g6<8,8,1>UW                     { align1 1H };
add(16)         g4<1>D          g2<8,8,1>D      g1.5<0,1,0>D    { align1 1H
compacted };
math intdiv(8)  g6<1>D          g4<8,8,1>D      1D              { align1 1Q
compacted };
math intdiv(8)  g7<1>D          g5<8,8,1>D      1D              { align1 2Q
compacted };
math intdiv(8)  g8<1>D          g4<8,8,1>D      9D              { align1 1Q
compacted };
math intdiv(8)  g9<1>D          g5<8,8,1>D      9D              { align1 2Q
compacted };
math intmod(8)  g10<1>D         g6<8,8,1>D      9D              { align1 1Q
compacted };
math intmod(8)  g11<1>D         g7<8,8,1>D      9D              { align1 2Q
compacted };
math intmod(8)  g12<1>D         g8<8,8,1>D      1D              { align1 1Q
compacted };
math intmod(8)  g13<1>D         g9<8,8,1>D      1D              { align1 2Q
compacted };
mov.nz.f0(16)   null<1>D        g10<8,8,1>D                     { align1 1H };
(+f0) xor.l.f0(16) null<1>D     g6<8,8,1>D      9D              { align1 1H
compacted };
(+f0) add(16)   g10<1>D         g10<8,8,1>D     9D              { align1 1H
compacted };
add(16)         g31<1>D         g14<8,8,1>D     g10<8,8,1>D     { align1 1H
compacted };
mov.nz.f0(16)   null<1>D        g12<8,8,1>D                     { align1 1H };
mov(8)          g23<1>UD        g31<8,8,1>UD                    { align1 1Q
compacted };
(+f0) xor.l.f0(16) null<1>D     g8<8,8,1>D      1D              { align1 1H
compacted };
(+f0) add(16)   g12<1>D         g12<8,8,1>D     1D              { align1 1H
compacted };
add(16)         g20<1>D         g18<8,8,1>D     g12<8,8,1>D     { align1 1H
compacted };
mov(8)          g24<1>UD        g20<8,8,1>UD                    { align1 1Q
compacted };
and(1)          a0<1>UD         g1.4<0,1,0>UD   0x000000ffUD    { align1 WE_all
compacted };
or(1)           a0<1>UD         a0<0,1,0>UD     0x0e0b5000UD    { align1 WE_all
};
send(8)         null<1>UW       g22<8,8,1>UD    a0<0,1,0>UD
                            dp data 1 indirect                              {
align1 1Q };
mov(8)          g2<1>UD         0D                              { align1 WE_all
2Q };
mov(8)          g3<1>UD         g32<8,8,1>UD                    { align1 2Q
compacted };
mov(8)          g4<1>UD         g21<8,8,1>UD                    { align1 2Q
compacted };
mov(8)          g5<1>F          g17<8,8,1>F                     { align1 2Q
compacted };
mov(8)          g6<1>F          g17<8,8,1>F                     { align1 2Q
compacted };
mov(8)          g7<1>F          g30<8,8,1>F                     { align1 2Q
compacted };
mov(8)          g8<1>F          1F                              { align1 2Q };
mov(1)          g2.7<1>UD       -1D                             { align1 WE_all
};
and(1)          a0<1>UD         g1.4<0,1,0>UD   0x000000ffUD    { align1 WE_all
};
or(1)           a0<1>UD         a0<0,1,0>UD     0x0e0b6000UD    { align1 WE_all
};
send(8)         null<1>UW       g2<8,8,1>UD     a0<0,1,0>UD
                            dp data 1 indirect                              {
align1 2Q };
mov(8)          g127<1>UD       g0<8,8,1>UD                     { align1 WE_all
1Q compacted };
send(16)        null<1>UW       g127<8,8,1>UD
                            thread_spawner mlen 1 rlen 0                    {
align1 WE_all 1H EOT };
   END B0


I only spent about 1 minute tracking this down, and it wasn't instantly obvious
what is generating this code.  I wanted to make a note of it before I forgot.
:)</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are the QA Contact for the bug.</li>
      </ul>
    </body>
</html>