[Mesa-dev] [PATCH 2/5] radeonsi: set dereferenceable attribute on descriptor arrays

Marek Olšák maraeo at gmail.com
Tue Jul 12 20:52:35 UTC 2016


From: Marek Olšák <marek.olsak at amd.com>

This allows moving the loads arbitrarily in the Sinking pass.

26002 shaders in 14643 tests
Totals:
SGPRS: 2080160 -> 2080160 (0.00 %)
VGPRS: 798875 -> 797826 (-0.13 %)
Spilled SGPRs: 108485 -> 79165 (-27.03 %)
Spilled VGPRs: 327 -> 327 (0.00 %)
Scratch VGPRs: 1656 -> 1652 (-0.24 %) dwords per thread
Code Size: 36127192 -> 35559780 (-1.57 %) bytes
LDS: 767 -> 767 (0.00 %) blocks
Max Waves: 212464 -> 212672 (0.10 %)
Wait states: 0 -> 0 (0.00 %)

 PERCENTAGES / App    Shaders    SGPRs     VGPRs  SpillSGPR SpillVGPR  Scratch   CodeSize  MaxWaves    Waits
 (unknown)                  4     .         .         .         .         .         .         .         .
 0ad                        6     .         .         .         .         .         .         .         .
 alien_isolation         2938     .        0.04 %   -8.53 %     .         .       -0.71 %   -0.06 %     .
 anholt                    10     .         .         .         .         .         .         .         .
 batman_arkham_origins    589     .       -0.58 %  -79.54 %     .         .       -6.72 %    0.57 %     .
 bioshock-infinite       1769     .       -0.65 %  -89.32 %     .         .       -4.73 %    0.48 %     .
 borderlands2            3968     .       -0.31 %  -51.21 %     .         .       -4.09 %    0.22 %     .
 brutal-legend            338     .       -0.03 %   -2.95 %     .         .       -0.06 %     .         .
 civilization_beyond..    116     .         .      -14.17 %     .         .       -0.88 %     .         .
 counter_strike_glob..   1142     .         .         .         .         .         .         .         .
 dirt-showdown            541     .       -0.56 %  -40.14 %     .       -3.45 %   -1.82 %    0.35 %     .
 dolphin                   22     .         .         .         .         .        0.16 %     .         .
 dota2                   1747     .         .         .         .         .        0.01 %     .         .
 europa_universalis_4      76     .       -0.23 %  -42.11 %     .         .       -0.96 %     .         .
 f1-2015                  774     .       -0.09 %  -28.89 %     .         .       -2.60 %    0.09 %     .
 furmark-0.7.0              4     .         .         .         .         .         .         .         .
 gimark-0.7.0              10     .         .         .         .         .         .         .         .
 glamor                    16     .         .         .         .         .         .         .         .
 humus-celshading           4     .         .         .         .         .         .         .         .
 humus-domino               6     .         .         .         .         .         .         .         .
 humus-dynamicbranching    24     .        0.71 %     .         .         .        0.29 %   -0.45 %     .
 humus-hdr                 10     .         .         .         .         .         .         .         .
 humus-portals              2     .         .         .         .         .         .         .         .
 humus-volumetricfog..      6     .         .         .         .         .         .         .         .
 left_4_dead_2           1762     .         .         .         .         .         .         .         .
 metro_2033_redux        2670     .       -0.10 %   -7.15 %     .         .       -0.03 %     .         .
 nexuiz                    80     .         .         .         .         .         .         .         .
 pixmark-julia-fp32         2     .         .         .         .         .         .         .         .
 pixmark-julia-fp64         2     .         .         .         .         .         .         .         .
 pixmark-piano-0.7.0        2     .         .         .         .         .         .         .         .
 pixmark-volplosion-..      2     .         .         .         .         .         .         .         .
 plot3d-0.7.0               8     .         .         .         .         .         .         .         .
 portal                   474     .         .         .         .         .         .         .         .
 sauerbraten                7     .         .         .         .         .         .         .         .
 serious_sam_3_bfe        392     .         .      -13.20 %     .         .       -1.81 %     .         .
 supertuxkart               4     .         .         .         .         .         .         .         .
 talos_principle          324     .       -0.21 %  -18.39 %     .         .       -2.73 %    0.14 %     .
 team_fortress_2          808     .         .         .         .         .         .         .         .
 tesseract                430     .        0.08 %  -68.57 %     .         .       -0.45 %     .         .
 tessmark-0.7.0             6     .         .         .         .         .         .         .         .
 thea                     172     .         .         .         .         .        0.03 %     .         .
 ue4_effects_cave         299     .       -0.04 %  -10.15 %     .         .       -0.25 %    0.04 %     .
 ue4_elemental            586     .       -0.02 %  -13.93 %     .         .       -0.13 %    0.02 %     .
 ue4_lightroom_inter..     74     .       -0.17 %  -70.00 %     .         .       -1.27 %     .         .
 ue4_realistic_rende..     92     .         .      -32.58 %     .         .       -0.35 %     .         .
 unigine_heaven           322     .        0.12 %  -54.17 %     .         .       -1.42 %   -0.12 %     .
 unigine_sanctuary        264     .         .         .         .         .         .         .         .
 unigine_tropics          210     .         .         .         .         .         .         .         .
 unigine_valley           278     .       -0.15 %  -40.74 %     .         .       -2.00 %    0.09 %     .
 unity                     72     .         .         .         .         .        0.03 %     .         .
 warsow                   176     .         .         .         .         .         .         .         .
 warzone2100                4     .         .         .         .         .        0.13 %     .         .
 witcher2                1040     .       -0.03 %  -86.28 %     .         .       -0.28 %    0.01 %     .
 xcom_enemy_within       1236     .       -0.24 %  -63.54 %     .         .       -0.93 %    0.18 %     .
 yofrankie                 82     .       -0.61 % -100.00 %     .         .       -0.83 %    0.41 %     .
 -----------------------------------------------------------------------------------------------------------
 Total                  26002     .       -0.13 %  -27.03 %     .       -0.24 %   -1.57 %    0.10 %     .
---
 src/gallium/drivers/radeonsi/si_shader.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 31140b1..b23c7c6 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -33,6 +33,7 @@
 #include "gallivm/lp_bld_arit.h"
 #include "gallivm/lp_bld_bitarit.h"
 #include "gallivm/lp_bld_flow.h"
+#include "gallivm/lp_bld_misc.h"
 #include "radeon/r600_cs.h"
 #include "radeon/radeon_llvm.h"
 #include "radeon/radeon_elf_util.h"
@@ -5311,11 +5312,17 @@ static void si_create_function(struct si_shader_context *ctx,
 	for (i = 0; i <= last_sgpr; ++i) {
 		LLVMValueRef P = LLVMGetParam(ctx->radeon_bld.main_fn, i);
 
-		/* We tell llvm that array inputs are passed by value to allow Sinking pass
-		 * to move load. Inputs are constant so this is fine. */
-		if (i <= last_array_pointer)
+		/* The combination of:
+		 * - ByVal
+		 * - dereferenceable
+		 * - tbaa
+		 * allows the optimization passes to move loads and reduces
+		 * SGPR spilling significantly.
+		 */
+		if (i <= last_array_pointer) {
 			LLVMAddAttribute(P, LLVMByValAttribute);
-		else
+			lp_add_attr_dereferenceable(P, UINT64_MAX);
+		} else
 			LLVMAddAttribute(P, LLVMInRegAttribute);
 	}
 
-- 
2.7.4



More information about the mesa-dev mailing list