Mesa (master): aco: remove vmem/smem score statistics

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Mar 11 16:41:21 UTC 2021


Module: Mesa
Branch: master
Commit: 38b2e137665f36f1b0fe58e40971548d9eafa865
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=38b2e137665f36f1b0fe58e40971548d9eafa865

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Wed Feb  3 14:34:09 2021 +0000

aco: remove vmem/smem score statistics

Replaced by the Latency statistic.

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8994>

---

 src/amd/compiler/aco_insert_waitcnt.cpp | 135 +-------------------------------
 src/amd/compiler/aco_interface.cpp      |   2 -
 src/amd/compiler/aco_ir.h               |   2 -
 3 files changed, 1 insertion(+), 138 deletions(-)

diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp
index 4f59f7cdaa7..fdb74cbd9e5 100644
--- a/src/amd/compiler/aco_insert_waitcnt.cpp
+++ b/src/amd/compiler/aco_insert_waitcnt.cpp
@@ -108,21 +108,6 @@ uint8_t get_counters_for_event(wait_event ev)
    }
 }
 
-uint16_t get_events_for_counter(counter_type ctr)
-{
-   switch (ctr) {
-   case counter_exp:
-      return exp_events;
-   case counter_lgkm:
-      return lgkm_events;
-   case counter_vm:
-      return vm_events;
-   case counter_vs:
-      return vs_events;
-   }
-   return 0;
-}
-
 struct wait_entry {
    wait_imm imm;
    uint16_t events; /* use wait_event notion */
@@ -207,13 +192,6 @@ struct wait_ctx {
 
    std::map<PhysReg,wait_entry> gpr_map;
 
-   /* used for vmem/smem scores */
-   bool collect_statistics;
-   Instruction *gen_instr;
-   std::map<Instruction *, unsigned> unwaited_instrs[num_counters];
-   std::map<PhysReg,std::set<Instruction *>> reg_instrs[num_counters];
-   std::vector<unsigned> wait_distances[num_events];
-
    wait_ctx() {}
    wait_ctx(Program *program_)
            : program(program_),
@@ -222,8 +200,7 @@ struct wait_ctx {
              max_exp_cnt(6),
              max_lgkm_cnt(program_->chip_class >= GFX10 ? 62 : 14),
              max_vs_cnt(program_->chip_class >= GFX10 ? 62 : 0),
-             unordered_events(event_smem | (program_->chip_class < GFX10 ? event_flat : 0)),
-             collect_statistics(program_->collect_statistics) {}
+             unordered_events(event_smem | (program_->chip_class < GFX10 ? event_flat : 0)) {}
 
    bool join(const wait_ctx* other, bool logical)
    {
@@ -262,56 +239,12 @@ struct wait_ctx {
          barrier_events[i] |= other->barrier_events[i];
       }
 
-      /* these are used for statistics, so don't update "changed" */
-      for (unsigned i = 0; i < num_counters; i++) {
-         for (const auto& instr : other->unwaited_instrs[i]) {
-            using iterator = std::map<Instruction *, unsigned>::iterator;
-            const std::pair<iterator, bool> insert_pair = unwaited_instrs[i].insert(instr);
-            if (!insert_pair.second) {
-               const iterator pos = insert_pair.first;
-               pos->second = std::min(pos->second, instr.second);
-            }
-         }
-         for (const auto& instr_pair : other->reg_instrs[i]) {
-            const PhysReg reg = instr_pair.first;
-            const std::set<Instruction *>& instrs = instr_pair.second;
-            reg_instrs[i][reg].insert(instrs.begin(), instrs.end());
-         }
-      }
-
       return changed;
    }
 
    void wait_and_remove_from_entry(PhysReg reg, wait_entry& entry, counter_type counter) {
-      if (collect_statistics && (entry.counters & counter)) {
-         unsigned counter_idx = ffs(counter) - 1;
-         for (Instruction *instr : reg_instrs[counter_idx][reg]) {
-            auto pos = unwaited_instrs[counter_idx].find(instr);
-            if (pos == unwaited_instrs[counter_idx].end())
-               continue;
-
-            unsigned distance = pos->second;
-            unsigned events = entry.events & get_events_for_counter(counter);
-            while (events) {
-               unsigned event_idx = u_bit_scan(&events);
-               wait_distances[event_idx].push_back(distance);
-            }
-
-            unwaited_instrs[counter_idx].erase(pos);
-         }
-         reg_instrs[counter_idx][reg].clear();
-      }
-
       entry.remove_counter(counter);
    }
-
-   void advance_unwaited_instrs()
-   {
-      for (unsigned i = 0; i < num_counters; i++) {
-         for (std::pair<Instruction * const, unsigned>& instr : unwaited_instrs[i])
-            instr.second++;
-      }
-   }
 };
 
 wait_imm check_instr(Instruction* instr, wait_ctx& ctx)
@@ -661,16 +594,6 @@ void insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event
       if (!it.second)
          it.first->second.join(new_entry);
    }
-
-   if (ctx.collect_statistics) {
-      unsigned counters_todo = counters;
-      while (counters_todo) {
-         unsigned i = u_bit_scan(&counters_todo);
-         ctx.unwaited_instrs[i].insert(std::make_pair(ctx.gen_instr, 0u));
-         for (unsigned j = 0; j < rc.size(); j++)
-            ctx.reg_instrs[i][PhysReg{reg.reg()+j}].insert(ctx.gen_instr);
-      }
-   }
 }
 
 void insert_wait_entry(wait_ctx& ctx, Operand op, wait_event event, bool has_sampler=false)
@@ -819,7 +742,6 @@ void handle_block(Program *program, Block& block, wait_ctx& ctx)
       memory_sync_info sync_info = get_sync_info(instr.get());
       queued_imm.combine(kill(instr.get(), ctx, sync_info));
 
-      ctx.gen_instr = instr.get();
       gen(instr.get(), ctx);
 
       if (instr->format != Format::PSEUDO_BARRIER && !is_wait) {
@@ -830,9 +752,6 @@ void handle_block(Program *program, Block& block, wait_ctx& ctx)
          new_instructions.emplace_back(std::move(instr));
 
          queued_imm.combine(perform_barrier(ctx, sync_info, semantic_acquire));
-
-         if (ctx.collect_statistics)
-            ctx.advance_unwaited_instrs();
       }
    }
 
@@ -844,51 +763,6 @@ void handle_block(Program *program, Block& block, wait_ctx& ctx)
 
 } /* end namespace */
 
-static uint32_t calculate_score(std::vector<wait_ctx> &ctx_vec, uint32_t event_mask)
-{
-   double result = 0.0;
-   unsigned num_waits = 0;
-   while (event_mask) {
-      unsigned event_index = u_bit_scan(&event_mask);
-      for (const wait_ctx &ctx : ctx_vec) {
-         for (unsigned dist : ctx.wait_distances[event_index]) {
-            double score = dist;
-            /* for many events, excessive distances provide little benefit, so
-             * decrease the score in that case. */
-            double threshold = INFINITY;
-            double inv_strength = 0.000001;
-            switch (1 << event_index) {
-            case event_smem:
-               threshold = 70.0;
-               inv_strength = 75.0;
-               break;
-            case event_vmem:
-            case event_vmem_store:
-            case event_flat:
-               threshold = 230.0;
-               inv_strength = 150.0;
-               break;
-            case event_lds:
-               threshold = 16.0;
-               break;
-            default:
-               break;
-            }
-            if (score > threshold) {
-               score -= threshold;
-               score = threshold + score / (1.0 + score / inv_strength);
-            }
-
-            /* we don't want increases in high scores to hide decreases in low scores,
-             * so raise to the power of 0.1 before averaging. */
-            result += pow(score, 0.1);
-            num_waits++;
-         }
-      }
-   }
-   return round(pow(result / num_waits, 10.0) * 10.0);
-}
-
 void insert_wait_states(Program* program)
 {
    /* per BB ctx */
@@ -942,13 +816,6 @@ void insert_wait_states(Program* program)
 
       out_ctx[current.index] = std::move(ctx);
    }
-
-   if (program->collect_statistics) {
-      program->statistics[statistic_vmem_score] =
-         calculate_score(out_ctx, event_vmem | event_flat | event_vmem_store);
-      program->statistics[statistic_smem_score] =
-         calculate_score(out_ctx, event_smem);
-   }
 }
 
 }
diff --git a/src/amd/compiler/aco_interface.cpp b/src/amd/compiler/aco_interface.cpp
index b02c3b29a70..7b4a2f40e5f 100644
--- a/src/amd/compiler/aco_interface.cpp
+++ b/src/amd/compiler/aco_interface.cpp
@@ -41,8 +41,6 @@ static const std::array<aco_compiler_statistic_info, aco::num_statistics> statis
    ret[aco::statistic_inv_throughput] = aco_compiler_statistic_info{"Inverse Throughput", "Estimated busy cycles to execute one wave"};
    ret[aco::statistic_vmem_clauses] = aco_compiler_statistic_info{"VMEM Clause", "Number of VMEM clauses (includes 1-sized clauses)"};
    ret[aco::statistic_smem_clauses] = aco_compiler_statistic_info{"SMEM Clause", "Number of SMEM clauses (includes 1-sized clauses)"};
-   ret[aco::statistic_vmem_score] = aco_compiler_statistic_info{"VMEM Score", "Average VMEM def-use distances"};
-   ret[aco::statistic_smem_score] = aco_compiler_statistic_info{"SMEM Score", "Average SMEM def-use distances"};
    ret[aco::statistic_sgpr_presched] = aco_compiler_statistic_info{"Pre-Sched SGPRs", "SGPR usage before scheduling"};
    ret[aco::statistic_vgpr_presched] = aco_compiler_statistic_info{"Pre-Sched VGPRs", "VGPR usage before scheduling"};
    return ret;
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 037f3de3697..ff3f3e63e7b 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1833,8 +1833,6 @@ enum statistic {
    statistic_inv_throughput,
    statistic_vmem_clauses,
    statistic_smem_clauses,
-   statistic_vmem_score,
-   statistic_smem_score,
    statistic_sgpr_presched,
    statistic_vgpr_presched,
    num_statistics



More information about the mesa-commit mailing list