[Mesa-dev] [PATCH 8/8] mesa: Micro-optimize _mesa_is_valid_prim_mode

Ian Romanick idr at freedesktop.org
Mon Dec 22 14:18:31 PST 2014


On 12/21/2014 11:33 PM, Kenneth Graunke wrote:
> On Friday, December 19, 2014 02:20:59 PM Ian Romanick wrote:
>> From: Ian Romanick <ian.d.romanick at intel.com>
>>
>> You would not believe the mess GCC 4.8.3 generated for the old
>> switch-statement.
>>
>> On Bay Trail-D using Fedora 20 compile flags (-m64 -O2 -mtune=generic
>> for 64-bit and -m32 -march=i686 -mtune=atom for 32-bit), affects
>> Gl32Batch7:
>>
>> 32-bit: Difference at 95.0% confidence -0.37374% +/- 0.184057% (n=40)
>> 64-bit: Difference at 95.0% confidence 0.966722% +/- 0.338442% (n=40)
>>
>> The regression on 32-bit is odd.  Callgrind says the caller,
>> _mesa_is_valid_prim_mode is faster.  Before it says 2,293,760
>> cycles, and after it says 917,504.
>>
>> Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
>> ---
>>  src/mesa/main/api_validate.c | 30 ++++++++++++------------------
>>  1 file changed, 12 insertions(+), 18 deletions(-)
>>
>> diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
>> index b882f0e..9c2e29e 100644
>> --- a/src/mesa/main/api_validate.c
>> +++ b/src/mesa/main/api_validate.c
>> @@ -113,27 +113,21 @@ check_valid_to_render(struct gl_context *ctx, const char *function)
>>  bool
>>  _mesa_is_valid_prim_mode(struct gl_context *ctx, GLenum mode)
>>  {
>> -   switch (mode) {
>> -   case GL_POINTS:
>> -   case GL_LINES:
>> -   case GL_LINE_LOOP:
>> -   case GL_LINE_STRIP:
>> -   case GL_TRIANGLES:
>> -   case GL_TRIANGLE_STRIP:
>> -   case GL_TRIANGLE_FAN:
>> +   /* The overwhelmingly common case is (mode <= GL_TRIANGLE_FAN).  Test that
>> +    * first and exit.  You would think that a switch-statement would be the
>> +    * right approach, but at least GCC 4.7.2 generates some pretty dire code
> 
> Perhaps update this to 4.8.3 to match your commit message?
> 
>> +    * for the common case.
>> +    */
>> +   if (likely(mode <= GL_TRIANGLE_FAN))
> 
> I'm not sure if likely() is a good idea.  My understanding is the penalty for
> getting it wrong is pretty hefty, and the other modes do happen.

They do get punished, but I think we want them to be punished.  With
the likely(), we get the following on 32-bit:

00028620 <_mesa_is_valid_prim_mode>:
   28620:       83 7c 24 08 06          cmpl   $0x6,0x8(%esp)
   28625:       77 06                   ja     2862d <_mesa_is_valid_prim_mode+0xd>
   28627:       b8 01 00 00 00          mov    $0x1,%eax
   2862c:       c3                      ret    
   2862d:       83 7c 24 08 09          cmpl   $0x9,0x8(%esp)
   28632:       76 3e                   jbe    28672 <_mesa_is_valid_prim_mode+0x52>
   28634:       31 c0                   xor    %eax,%eax
   28636:       83 7c 24 08 0d          cmpl   $0xd,0x8(%esp)
   2863b:       77 ef                   ja     2862c <_mesa_is_valid_prim_mode+0xc>
   2863d:       8b 44 24 04             mov    0x4(%esp),%eax
   28641:       8b 50 04                mov    0x4(%eax),%edx
   28644:       83 fa 03                cmp    $0x3,%edx
   28647:       74 06                   je     2864f <_mesa_is_valid_prim_mode+0x2f>
   28649:       31 c0                   xor    %eax,%eax
   2864b:       85 d2                   test   %edx,%edx
   2864d:       75 1f                   jne    2866e <_mesa_is_valid_prim_mode+0x4e>
   2864f:       8b 4c 24 04             mov    0x4(%esp),%ecx
   28653:       b8 01 00 00 00          mov    $0x1,%eax
   28658:       83 b9 00 11 00 00 1f    cmpl   $0x1f,0x1100(%ecx)
   2865f:       77 0d                   ja     2866e <_mesa_is_valid_prim_mode+0x4e>
   28661:       80 b9 81 10 00 00 00    cmpb   $0x0,0x1081(%ecx)
   28668:       0f 95 c0                setne  %al
   2866b:       0f b6 c0                movzbl %al,%eax
   2866e:       83 e0 01                and    $0x1,%eax
   28671:       c3                      ret    
   28672:       8b 44 24 04             mov    0x4(%esp),%eax
   28676:       83 78 04 00             cmpl   $0x0,0x4(%eax)
   2867a:       0f 94 c0                sete   %al
   2867d:       c3                      ret    

Without the likely(), we get:

00028620 <_mesa_is_valid_prim_mode>:
   28620:       8b 54 24 08             mov    0x8(%esp),%edx
   28624:       b8 01 00 00 00          mov    $0x1,%eax
   28629:       83 fa 06                cmp    $0x6,%edx
   2862c:       76 42                   jbe    28670 <_mesa_is_valid_prim_mode+0x50>
   2862e:       83 fa 09                cmp    $0x9,%edx
   28631:       76 45                   jbe    28678 <_mesa_is_valid_prim_mode+0x58>
   28633:       31 c0                   xor    %eax,%eax
   28635:       83 fa 0d                cmp    $0xd,%edx
   28638:       77 36                   ja     28670 <_mesa_is_valid_prim_mode+0x50>
   2863a:       8b 44 24 04             mov    0x4(%esp),%eax
   2863e:       8b 40 04                mov    0x4(%eax),%eax
   28641:       83 f8 03                cmp    $0x3,%eax
   28644:       0f 94 c2                sete   %dl
   28647:       85 c0                   test   %eax,%eax
   28649:       0f 94 c0                sete   %al
   2864c:       08 d0                   or     %dl,%al
   2864e:       74 20                   je     28670 <_mesa_is_valid_prim_mode+0x50>
   28650:       8b 4c 24 04             mov    0x4(%esp),%ecx
   28654:       83 b9 00 11 00 00 1f    cmpl   $0x1f,0x1100(%ecx)
   2865b:       77 13                   ja     28670 <_mesa_is_valid_prim_mode+0x50>
   2865d:       80 b9 81 10 00 00 00    cmpb   $0x0,0x1081(%ecx)
   28664:       0f 95 c0                setne  %al
   28667:       89 f6                   mov    %esi,%esi
   28669:       8d bc 27 00 00 00 00    lea    0x0(%edi,%eiz,1),%edi
   28670:       c3                      ret    
   28671:       8d b4 26 00 00 00 00    lea    0x0(%esi,%eiz,1),%esi
   28678:       8b 44 24 04             mov    0x4(%esp),%eax
   2867c:       8b 40 04                mov    0x4(%eax),%eax
   2867f:       85 c0                   test   %eax,%eax
   28681:       0f 94 c0                sete   %al
   28684:       c3                      ret    

Let me try a couple of things and take some measurements.

> Getting rid of the switch statement makes a lot of sense.  I like the new
> approach.
> 
> With likely removed, this is:
> Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
> 
>>        return true;
>> -   case GL_QUADS:
>> -   case GL_QUAD_STRIP:
>> -   case GL_POLYGON:
>> +
>> +   if (mode <= GL_POLYGON)
>>        return (ctx->API == API_OPENGL_COMPAT);
>> -   case GL_LINES_ADJACENCY:
>> -   case GL_LINE_STRIP_ADJACENCY:
>> -   case GL_TRIANGLES_ADJACENCY:
>> -   case GL_TRIANGLE_STRIP_ADJACENCY:
>> +
>> +   if (mode <= GL_TRIANGLE_STRIP_ADJACENCY)
>>        return _mesa_has_geometry_shaders(ctx);
>> -   default:
>> -      return false;
>> -   }
>> +
>> +   return false;
>>  }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: OpenPGP digital signature
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20141222/43028592/attachment.sig>


More information about the mesa-dev mailing list