[Mesa-dev] [PATCH 22/22] meta/blit: Use GL_EXT_shader_samples_identical in MSAA-SS resolve blit

Mon Feb 22 21:05:28 UTC 2016

On 02/19/2016 02:57 AM, Iago Toral wrote:
> I don't know much about this, but using shader_samples_identical should
> only give a benefit if we actually get identical samples, otherwise it
> means more work, right?

It is possible to do more work.  Assuming the backend compiler does
everything right, that amount of extra work should be trivial.  The
initial texelFetch will get the MCS data and the sample zero data.  The
textureSamplesIdenticalEXT also wants the MCS data.  The backend CSE
pass should eliminate the second fetch of MCS data.  Then the added
work is just comparing the MCS data with 0 and the flow control.

Below is the BDW SIMD8 code for SAMPLES=4.  I see at least 8
instructions that could be removed (all the moves to g124-g127).
Without GL_EXT_shader_samples_identical, none of those extra
instructions are there.  I'll play around with the GLSL to see if I can
get the backend to generate better code.  Otherwise, we'll have to see
about fixing the backend.

SIMD8 shader: 51 instructions. 0 loops. 170 cycles. 0:0 spills:fills. Promoted 0 constants. Compacted 816 to 544 bytes (33%)
   START B0
pln(8)          g34<1>F         g4<0,1,0>F      g2<8,8,1>F      { align1 1Q compacted };
pln(8)          g35<1>F         g4.4<0,1,0>F    g2<8,8,1>F      { align1 1Q compacted };
mov(8)          g6<1>F          [0F, 0F, 0F, 0F]VF              { align1 1Q compacted };
mov(8)          g7<1>F          [0F, 0F, 0F, 0F]VF              { align1 1Q compacted };
mov(8)          g8<1>D          g34<8,8,1>F                     { align1 1Q compacted };
mov(8)          g9<1>D          g35<8,8,1>F                     { align1 1Q compacted };
send(8)         g2<1>UW         g6<8,8,1>F
                            sampler ld2dms SIMD8 Surface = 1 Sampler = 0 mlen 4 rlen 4 { align1 1Q };
mov(8)          g124<1>F        g2<8,8,1>F                      { align1 1Q compacted };
mov(8)          g125<1>F        g3<8,8,1>F                      { align1 1Q compacted };
mov(8)          g126<1>F        g4<8,8,1>F                      { align1 1Q compacted };
mov(8)          g127<1>F        g5<8,8,1>F                      { align1 1Q compacted };
mov.nz.f0(8)    null<1>UD       0x00000000UD                    { align1 1Q };
(+f0) if(8)     JIP: 32         UIP: 392                        { align1 1Q };
   END B0 ->B1 ->B2
   START B1 <-B0
else(8)         JIP: 376        UIP: 376                        { align1 1Q };
   END B1 ->B3
   START B2 <-B0
mov(8)          g10<1>F         1.4013e-45F                     { align1 1Q };
mov(8)          g11<1>F         [0F, 0F, 0F, 0F]VF              { align1 1Q compacted };
mov(8)          g12<1>F         g8<8,8,1>F                      { align1 1Q compacted };
mov(8)          g13<1>F         g9<8,8,1>F                      { align1 1Q compacted };
mov(8)          g14<1>F         2.8026e-45F                     { align1 1Q };
mov(8)          g15<1>F         [0F, 0F, 0F, 0F]VF              { align1 1Q compacted };
mov(8)          g16<1>F         g8<8,8,1>F                      { align1 1Q compacted };
mov(8)          g17<1>F         g9<8,8,1>F                      { align1 1Q compacted };
mov(8)          g18<1>F         4.2039e-45F                     { align1 1Q };
mov(8)          g19<1>F         [0F, 0F, 0F, 0F]VF              { align1 1Q compacted };
mov(8)          g20<1>F         g8<8,8,1>F                      { align1 1Q compacted };
mov(8)          g21<1>F         g9<8,8,1>F                      { align1 1Q compacted };
send(8)         g6<1>UW         g10<8,8,1>F
                            sampler ld2dms SIMD8 Surface = 1 Sampler = 0 mlen 4 rlen 4 { align1 1Q };
send(8)         g10<1>UW        g14<8,8,1>F
                            sampler ld2dms SIMD8 Surface = 1 Sampler = 0 mlen 4 rlen 4 { align1 1Q };
send(8)         g14<1>UW        g18<8,8,1>F
                            sampler ld2dms SIMD8 Surface = 1 Sampler = 0 mlen 4 rlen 4 { align1 1Q };
add(8)          g18<1>F         g2<8,8,1>F      g6<8,8,1>F      { align1 1Q compacted };
add(8)          g22<1>F         g3<8,8,1>F      g7<8,8,1>F      { align1 1Q compacted };
add(8)          g26<1>F         g4<8,8,1>F      g8<8,8,1>F      { align1 1Q compacted };
add(8)          g30<1>F         g5<8,8,1>F      g9<8,8,1>F      { align1 1Q compacted };
add(8)          g19<1>F         g10<8,8,1>F     g14<8,8,1>F     { align1 1Q compacted };
add(8)          g23<1>F         g11<8,8,1>F     g15<8,8,1>F     { align1 1Q compacted };
add(8)          g27<1>F         g12<8,8,1>F     g16<8,8,1>F     { align1 1Q compacted };
add(8)          g31<1>F         g13<8,8,1>F     g17<8,8,1>F     { align1 1Q compacted };
add(8)          g20<1>F         g18<8,8,1>F     g19<8,8,1>F     { align1 1Q compacted };
add(8)          g24<1>F         g22<8,8,1>F     g23<8,8,1>F     { align1 1Q compacted };
add(8)          g28<1>F         g26<8,8,1>F     g27<8,8,1>F     { align1 1Q compacted };
add(8)          g32<1>F         g30<8,8,1>F     g31<8,8,1>F     { align1 1Q compacted };
mul(8)          g21<1>F         g20<8,8,1>F     0.25F           { align1 1Q };
mul(8)          g25<1>F         g24<8,8,1>F     0.25F           { align1 1Q };
mul(8)          g29<1>F         g28<8,8,1>F     0.25F           { align1 1Q };
mul(8)          g33<1>F         g32<8,8,1>F     0.25F           { align1 1Q };
mov(8)          g124<1>UD       g21<8,8,1>UD                    { align1 1Q compacted };
mov(8)          g125<1>UD       g25<8,8,1>UD                    { align1 1Q compacted };
mov(8)          g126<1>UD       g29<8,8,1>UD                    { align1 1Q compacted };
mov(8)          g127<1>UD       g33<8,8,1>UD                    { align1 1Q compacted };
   END B2 ->B3
   START B3 <-B2 <-B1
endif(8)        JIP: 16                                         { align1 1Q };
sendc(8)        null<1>UW       g124<8,8,1>F
                            render RT write SIMD8 LastRT Surface = 0 mlen 4 rlen 0 { align1 1Q EOT };
nop                                                             ;
   END B3

> I noticed that the test renders a quad with
> random colors for each vertex that will be interpolated across the
> region, so could it be that we are hitting few cases of identical
> samples? Shouldn't the results be expected to be better if we rendered a
> flat color instead?