[Mesa-dev] r600g shader optimization
Vadim Girlin
vadimgirlin at gmail.com
Fri Oct 7 07:14:34 PDT 2011
Hi,
Recently I've been working on the shader optimization for r600g, and now
I have the initial working implementation of simple alu scheduler and
register allocator. It has no piglit regressions, though it's still a
work in progress and there are known issues with some applications.
I've pushed the working branch to github:
https://github.com/VadimGirlin/mesa/tree/r600_shader_opt
Currently it supports evergreen only, but I'm planning to make it work
with other chips too. It uses "struct r600_bytecode" as the source,
converting it to SSA-based internal representation. I'm going to
implement some optimization passes at that phase, but currently it's
then doing final steps - register allocation, alu scheduling, and
building new bytecode.
I'm attaching as an example the dump for one of the shaders in the
glxgears. You could get such dump for all shaders before and after
processing by setting R600_OPT_DUMP environment variable to 2. Setting
this variable to 1 will only print some information for the processed
shaders - size, number of gprs, and number of alu instruction groups.
Vadim
-------------- next part --------------
bytecode 130 dw -- 11 gprs ---------------------
E
0000 00000000 CF ADDR:0
0001 84C00000 CF INST:13 CALL_FS COND:0 POP_COUNT:0
0002 80000004 ALU ADDR:8 KCACHE_MODE0:2 KCACHE_BANK0:0 KCACHE_BANK1:0
0003 A0F00000 ALU INST:0x8 ALU KCACHE_MODE1:0 KCACHE_ADDR0:0 KCACHE_ADDR1:0 COUNT:61
1 MUL R5.x, R1.x, KC0[0].x
MUL R5.y, R1.x, KC0[0].y
MUL R5.z, R1.x, KC0[0].z
MUL R5.w, R1.x, KC0[0].w
2 MULADD R5.x, R1.y, KC0[1].x, PV.x
MULADD R5.y, R1.y, KC0[1].y, PV.y
MULADD R5.z, R1.y, KC0[1].z, PV.z
MULADD R5.w, R1.y, KC0[1].w, PV.w
3 MULADD R5.x, R1.z, KC0[2].x, PV.x
MULADD R5.y, R1.z, KC0[2].y, PV.y
MULADD R5.z, R1.z, KC0[2].z, PV.z
MULADD R5.w, R1.z, KC0[2].w, PV.w
4 MULADD R3.x, R1.w, KC0[3].x, PV.x
MULADD R3.y, R1.w, KC0[3].y, PV.y
MULADD R3.z, R1.w, KC0[3].z, PV.z
MULADD R3.w, R1.w, KC0[3].w, PV.w
5 DOT4 R6.x, R2.x, R2.x
DOT4 __.y, R2.y, R2.y
DOT4 __.z, R2.z, R2.z
DOT4 __.w, 0.0f, 0.0f
6 RECIPSQRT_CLAMPED R10.x, |PV.x|
7 MOV R6.x, PS
MOV __.y, PS
MOV __.z, PS
MOV __.w, PS
8 MUL R5.x, R2.x, PV.x
MUL R5.y, R2.y, PV.x
MUL R5.z, R2.z, PV.x
MUL R5.w, R2.w, PV.x
9 MOV R7.x, KC0[4].x
MOV R7.y, KC0[4].y
MOV R7.z, KC0[4].z
MOV R7.w, KC0[4].w
10 MOV R4.x, PV.x
MOV R4.y, PV.y
MOV R4.z, PV.z
MOV R4.w, PV.w
11 DOT4 R8.x, R5.x, KC0[5].x
DOT4 R8.y, R5.y, KC0[5].y
DOT4 R8.z, R5.z, KC0[5].z
DOT4 R8.w, 0.0f, 0.0f
12 MAX R6.x, 0.0f, PV.x
MAX R6.y, 0.0f, PV.x
MAX R6.z, 0.0f, PV.x
MAX R6.w, 1.0f, PV.x
SETGT R6.z, PV.x, 0.0f
13 ADD R7.x, KC0[6].x, R7.x
ADD R7.y, KC0[6].y, R7.y
ADD R7.z, KC0[6].z, R7.z
ADD R7.w, KC0[6].w, R7.w
14 MULADD R7.x, R6.y, KC0[7].x, PV.x
MULADD R7.y, R6.y, KC0[7].y, PV.y
MULADD R7.z, R6.y, KC0[7].z, PV.z
MULADD R7.w, R6.y, KC0[7].w, PV.w
15 MULADD R4.x, R6.z, KC0[8].x, PV.x
MULADD R4.y, R6.z, KC0[8].y, PV.y
MULADD R4.z, R6.z, KC0[8].z, PV.z
16 MOV_sat R4.x, PV.x
MOV_sat R4.y, PV.y
MOV_sat R4.z, PV.z
MOV_sat R4.w, R4.w
EXPORT_DONE POS 60, R3.xyzw
EXPORT_DONE PARAM 0, R4.xyzw
--------------------------------------
optimizing shader 7
INFO: shader optimized : size -27.7% ( 130 -> 94 dw), gpr -63.6% ( 11 -> 4 ), alu_groups -31.2% ( 16 -> 11 )
optimized bytecode 94 dw -- 4 gprs ---------------------
E
0000 00000000 CF ADDR:0
0001 84C00000 CF INST:13 CALL_FS COND:0 POP_COUNT:0
0002 80000004 ALU ADDR:8 KCACHE_MODE0:2 KCACHE_BANK0:0 KCACHE_BANK1:0
0003 A0A80000 ALU INST:0x8 ALU KCACHE_MODE1:0 KCACHE_ADDR0:0 KCACHE_ADDR1:0 COUNT:43
1 MUL R0.x, R1.x, KC0[0].x
MUL R0.y, R1.x, KC0[0].y
2 MULADD R0.x, R1.y, KC0[1].x, PV.x
MULADD R0.y, R1.y, KC0[1].y, PV.y
MUL R0.z, R1.x, KC0[0].z
MUL R0.w, R1.x, KC0[0].w
3 MULADD R1.x, R1.z, KC0[2].x, PV.x
MULADD R0.z, R1.y, KC0[1].z, PV.z
MULADD R0.w, R1.y, KC0[1].w, PV.w
4 DOT4 R0.x, R2.x, R2.x
DOT4 __.y, R2.y, R2.y
DOT4 __.z, R2.z, R2.z
DOT4 __.w, 0.0f, 0.0f
MULADD R0.y, R1.z, KC0[2].y, R0.y
5 MULADD R1.x, R1.w, KC0[3].x, R1.x
MULADD R0.z, R1.z, KC0[2].z, R0.z
MULADD R0.w, R1.z, KC0[2].w, R0.w
RECIPSQRT_CLAMPED R0.x, |PV.x|
6 MUL R2.x, R2.x, PS
MUL R2.y, R2.y, PS
MULADD R1.z, R1.w, KC0[3].z, PV.z
MULADD R1.w, R1.w, KC0[3].w, PV.w
MULADD R1.y, R1.w, KC0[3].y, R0.y
7 MOV R0.x, KC0[4].y
MOV R0.y, KC0[4].z
MOV R0.z, KC0[4].x
MOV_sat R0.w, KC0[4].w
MUL R2.z, R2.z, R0.x
8 DOT4 R3.x, R2.x, KC0[5].x
DOT4 __.y, R2.y, KC0[5].y
DOT4 __.z, PS, KC0[5].z
DOT4 __.w, 0.0f, 0.0f
9 ADD R0.x, KC0[6].z, R0.y
ADD R0.y, KC0[6].y, R0.x
ADD R0.z, KC0[6].x, R0.z
MAX R2.x, 0.0f, PV.x
10 MULADD R0.x, PS, KC0[7].z, PV.x
SETGT R0.y, R3.x, 0.0f
MULADD R0.z, PS, KC0[7].y, PV.y
MULADD R2.x, PS, KC0[7].x, PV.z
11 MULADD_sat R0.x, PV.y, KC0[8].x, PS
MULADD_sat R0.y, PV.y, KC0[8].y, PV.z
MULADD_sat R0.z, PV.y, KC0[8].z, PV.x
EXPORT_DONE POS 60, R1.xyzw
EXPORT_DONE PARAM 0, R0.xyzw
--------------------------------------
More information about the mesa-dev
mailing list