Mesa (main): aco/spill: Prefer unordered_map over map for next use distances

Fri Oct 1 09:57:11 UTC 2021

Module: Mesa
Branch: main
Commit: 6650799ea5c45272a59a0b764e20b8d7a6ac44fc
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=6650799ea5c45272a59a0b764e20b8d7a6ac44fc

Author: Tony Wasserka <tony.wasserka at gmx.de>
Date:   Tue Jul 13 12:31:12 2021 +0200

aco/spill: Prefer unordered_map over map for next use distances

This changes the iteration order of next use distances, so some "random"
changes to shader metrics are expected.

fossil-db for Navi14:
Totals from 1239 (0.82% of 150305) affected shaders:
SpillSGPRs: 10559 -> 10562 (+0.03%); split: -0.05%, +0.08%
SpillVGPRs: 1632 -> 1863 (+14.15%)
CodeSize: 19321468 -> 19304164 (-0.09%); split: -0.09%, +0.01%
Instrs: 3593957 -> 3591647 (-0.06%); split: -0.07%, +0.01%
Latency: 103120695 -> 102475647 (-0.63%); split: -0.63%, +0.01%
InvThroughput: 23897614 -> 23575320 (-1.35%); split: -1.36%, +0.02%
VClause: 66406 -> 66943 (+0.81%); split: -0.01%, +0.81%
SClause: 118559 -> 118548 (-0.01%)
Copies: 310871 -> 308950 (-0.62%); split: -0.69%, +0.08%
Branches: 123386 -> 123413 (+0.02%); split: -0.00%, +0.03%

These numbers mostly come from parallel-rdp ubershaders. Small changes are
also found in the rdr2 and rage2 shader metrics, whereas others are not
significantly affected.

Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11925>

---

 src/amd/compiler/aco_spill.cpp | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp
index 9367db9e160..35a532dea72 100644
--- a/src/amd/compiler/aco_spill.cpp
+++ b/src/amd/compiler/aco_spill.cpp
@@ -29,12 +29,25 @@
 #include "common/sid.h"
 
 #include <algorithm>
+#include <cstring>
 #include <map>
 #include <set>
 #include <stack>
+#include <unordered_map>
 #include <unordered_set>
 #include <vector>
 
+namespace std {
+template <> struct hash<aco::Temp> {
+   size_t operator()(aco::Temp temp) const noexcept
+   {
+      uint32_t v;
+      std::memcpy(&v, &temp, sizeof(temp));
+      return std::hash<uint32_t>{}(v);
+   }
+};
+} // namespace std
+
 /*
  * Implements the spilling algorithm on SSA-form from
  * "Register Spilling and Live-Range Splitting for SSA-Form Programs"
@@ -59,8 +72,8 @@ struct spill_ctx {
 
    std::vector<bool> processed;
    std::stack<Block*, std::vector<Block*>> loop_header;
-   std::vector<std::map<Temp, std::pair<uint32_t, uint32_t>>> next_use_distances_start;
-   std::vector<std::map<Temp, std::pair<uint32_t, uint32_t>>> next_use_distances_end;
+   std::vector<std::unordered_map<Temp, std::pair<uint32_t, uint32_t>>> next_use_distances_start;
+   std::vector<std::unordered_map<Temp, std::pair<uint32_t, uint32_t>>> next_use_distances_end;
    std::vector<std::vector<std::pair<Temp, uint32_t>>> local_next_use_distance; /* Working buffer */
    std::vector<std::pair<RegClass, std::unordered_set<uint32_t>>> interferences;
    std::vector<std::vector<uint32_t>> affinities;
@@ -158,11 +171,12 @@ void
 next_uses_per_block(spill_ctx& ctx, unsigned block_idx, std::set<uint32_t>& worklist)
 {
    Block* block = &ctx.program->blocks[block_idx];
-   std::map<Temp, std::pair<uint32_t, uint32_t>> next_uses = ctx.next_use_distances_end[block_idx];
+   std::unordered_map<Temp, std::pair<uint32_t, uint32_t>> next_uses =
+      ctx.next_use_distances_end[block_idx];
 
    /* to compute the next use distance at the beginning of the block, we have to add the block's
     * size */
-   for (std::map<Temp, std::pair<uint32_t, uint32_t>>::iterator it = next_uses.begin();
+   for (std::unordered_map<Temp, std::pair<uint32_t, uint32_t>>::iterator it = next_uses.begin();
         it != next_uses.end(); ++it)
       it->second.second = it->second.second + block->instructions.size();