Mesa (staging/21.3): intel/perf: Store indices to strings rather than pointers

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue May 10 06:58:50 UTC 2022


Module: Mesa
Branch: staging/21.3
Commit: 063863ddd63e5a8f3a5bdeb55e51e1a391b3630d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=063863ddd63e5a8f3a5bdeb55e51e1a391b3630d

Author: Matt Turner <mattst88 at gmail.com>
Date:   Mon Jan 31 13:16:26 2022 -0800

intel/perf: Store indices to strings rather than pointers

The compiler does a good job of deduplicating strings already, but we
can eliminate the pointers to each string by combining the strings into
a single char array and storing only an index into that array.

The longest of the char arrays is the descriptions array, which is a
little over 45 KiB, so still under MSVC's 64 KiB string literal limit
[0]. Because the string length is under 64 KiB we can use uint16_t as
the index type, which roughly doubles our savings as compared to an int.

This cuts 77 KiB from iris_dri.so (0.5%) and libvulkan_intel.so (0.9%).

   text    data     bss     dec     hex filename
 926811   25920       0  952731   e899b meson-generated_.._intel_perf_metrics.c.o (before)
 924401       0       0  924401   e1af1 meson-generated_.._intel_perf_metrics.c.o (after)

   text    data     bss     dec     hex filename
14190852 391628  210004 14792484 e1b724 iris_dri.so (before)
14137732 365708  210004 14713444 e08264 iris_dri.so (after)

   text    data     bss     dec     hex filename
8184097  240184   22820 8447101  80e47d libvulkan_intel.so (before)
8131009  214264   22820 8368093  7fafdd libvulkan_intel.so (after)

relinfo:
iris_dri.so (before): 17765 relocations, 17545 relative (98%), 452 PLT entries, 1 for local syms (0%), 0 users
iris_dri.so (after) : 15605 relocations, 15385 relative (98%), 452 PLT entries, 1 for local syms (0%), 0 users

libvulkan_intel.so (before): 10720 relocations, 6989 relative (65%), 355 PLT entries, 1 for local syms (0%), 0 users
libvulkan_intel.so (after) :  8560 relocations, 4829 relative (56%), 355 PLT entries, 1 for local syms (0%), 0 users

[0] https://docs.microsoft.com/en-us/cpp/cpp/string-and-character-literals-cpp?view=msvc-170&viewFallbackFrom=vs-2019

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
(cherry picked from commit 6c0246dcf4f2d4e2ccdaa97d52833cf9f11ffa4b)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16405>

---

 src/intel/perf/gen_perf.py        | 74 +++++++++++++++++++++++++++++++++------
 src/intel/perf/intel_perf_setup.h |  8 ++---
 2 files changed, 68 insertions(+), 14 deletions(-)

diff --git a/src/intel/perf/gen_perf.py b/src/intel/perf/gen_perf.py
index b7ffbf88aea..0666f629163 100644
--- a/src/intel/perf/gen_perf.py
+++ b/src/intel/perf/gen_perf.py
@@ -20,6 +20,7 @@
 # IN THE SOFTWARE.
 
 import argparse
+import builtins
 import collections
 import os
 import sys
@@ -414,7 +415,9 @@ def counter_key(counter):
     return counter_key_tuple._make([counter.get(field) for field in counter_key_tuple._fields])
 
 
-def output_counter_struct(set, counter, idx):
+def output_counter_struct(set, counter, idx,
+                          name_to_idx, desc_to_idx,
+                          symbol_name_to_idx, category_to_idx):
     data_type = counter.data_type
     data_type_uc = data_type.upper()
 
@@ -426,10 +429,10 @@ def output_counter_struct(set, counter, idx):
 
     c("[" + str(idx) + "] = {\n")
     c_indent(3)
-    c(".name = \"" + counter.name + "\",\n")
-    c(".desc = \"" + counter.description + " " + desc_units(counter.units) + "\",\n")
-    c(".symbol_name = \"" + counter.symbol_name + "\",\n")
-    c(".category = \"" + counter.mdapi_group + "\",\n")
+    c(".name_idx = " + str(name_to_idx[counter.name]) + ",\n")
+    c(".desc_idx = " + str(desc_to_idx[counter.description + " " + desc_units(counter.units)]) + ",\n")
+    c(".symbol_name_idx = " + str(symbol_name_to_idx[counter.symbol_name]) + ",\n")
+    c(".category_idx = " + str(category_to_idx[counter.mdapi_group]) + ",\n")
     c(".type = INTEL_PERF_COUNTER_TYPE_" + semantic_type_uc + ",\n")
     c(".data_type = INTEL_PERF_COUNTER_DATA_TYPE_" + data_type_uc + ",\n")
     c(".units = INTEL_PERF_COUNTER_UNITS_" + output_units(counter.units) + ",\n")
@@ -476,6 +479,29 @@ def output_counter_report(set, counter, counter_to_idx, current_offset):
     return current_offset + sizeof(c_type)
 
 
+def str_to_idx_table(strs):
+    sorted_strs = sorted(strs)
+
+    str_to_idx = collections.OrderedDict()
+    str_to_idx[sorted_strs[0]] = 0
+    previous = sorted_strs[0]
+
+    for i in range(1, len(sorted_strs)):
+        str_to_idx[sorted_strs[i]] = str_to_idx[previous] + len(previous) + 1
+        previous = sorted_strs[i]
+
+    return str_to_idx
+
+
+def output_str_table(name: str, str_to_idx):
+    c("\n")
+    c("static const char " + name + "[] = {\n")
+    c_indent(3)
+    c("\n".join(f"/* {idx} */ \"{val}\\0\"" for val, idx in str_to_idx.items()))
+    c_outdent(3)
+    c("};\n")
+
+
 register_types = {
     'FLEX': 'flex_regs',
     'NOA': 'mux_regs',
@@ -728,6 +754,30 @@ def main():
         #include "perf/intel_perf_setup.h"
         """))
 
+    names = builtins.set()
+    descs = builtins.set()
+    symbol_names = builtins.set()
+    categories = builtins.set()
+    for gen in gens:
+        for set in gen.sets:
+            for counter in set.counters:
+                names.add(counter.get('name'))
+                symbol_names.add(counter.get('symbol_name'))
+                descs.add(counter.get('description') + " " + desc_units(counter.get('units')))
+                categories.add(counter.get('mdapi_group'))
+
+    name_to_idx = str_to_idx_table(names)
+    output_str_table("name", name_to_idx)
+
+    desc_to_idx = str_to_idx_table(descs)
+    output_str_table("desc", desc_to_idx)
+
+    symbol_name_to_idx = str_to_idx_table(symbol_names)
+    output_str_table("symbol_name", symbol_name_to_idx)
+
+    category_to_idx = str_to_idx_table(categories)
+    output_str_table("category", category_to_idx)
+
     # Print out all equation functions.
     for gen in gens:
         for set in gen.sets:
@@ -747,7 +797,11 @@ def main():
                 key = counter_key(counter)
                 if key not in counter_to_idx:
                     counter_to_idx[key] = idx
-                    output_counter_struct(set, key, idx)
+                    output_counter_struct(set, key, idx,
+                                          name_to_idx,
+                                          desc_to_idx,
+                                          symbol_name_to_idx,
+                                          category_to_idx)
                     idx += 1
 
     c_outdent(3)
@@ -764,10 +818,10 @@ def main():
         {
            const struct intel_perf_query_counter_data *counter = &counters[counter_idx];
 
-           dest->name = counter->name;
-           dest->desc = counter->desc;
-           dest->symbol_name = counter->symbol_name;
-           dest->category = counter->category;
+           dest->name = &name[counter->name_idx];
+           dest->desc = &desc[counter->desc_idx];
+           dest->symbol_name = &symbol_name[counter->symbol_name_idx];
+           dest->category = &category[counter->category_idx];
            dest->raw_max = raw_max;
 
            dest->offset = offset;
diff --git a/src/intel/perf/intel_perf_setup.h b/src/intel/perf/intel_perf_setup.h
index 1f31c18c2d5..d481255d3f4 100644
--- a/src/intel/perf/intel_perf_setup.h
+++ b/src/intel/perf/intel_perf_setup.h
@@ -73,10 +73,10 @@ bdw_query_alloc(struct intel_perf_config *perf, int ncounters)
 }
 
 struct intel_perf_query_counter_data {
-   const char *name;
-   const char *desc;
-   const char *symbol_name;
-   const char *category;
+   uint16_t name_idx;
+   uint16_t desc_idx;
+   uint16_t symbol_name_idx;
+   uint16_t category_idx;
    enum intel_perf_counter_type type;
    enum intel_perf_counter_data_type data_type;
    enum intel_perf_counter_units units;



More information about the mesa-commit mailing list