Mesa (main): intel/perf: add max vfuncs

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue May 17 20:20:08 UTC 2022


Module: Mesa
Branch: main
Commit: 66045acdf9969e1fc1becb033287b5c52342207b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=66045acdf9969e1fc1becb033287b5c52342207b

Author: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Date:   Thu Mar 10 12:18:51 2022 +0200

intel/perf: add max vfuncs

New counters will use those from inside their read function to
generate percentage numbers.

v2: Forgot to update Iris (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Reviewed-by: Ian Romanick <ian.d.romanick at intel.com> (v1)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16144>

---

 src/gallium/drivers/iris/iris_monitor.c           |  27 +++--
 src/gallium/drivers/iris/iris_performance_query.c |  14 ++-
 src/intel/perf/gen_perf.py                        | 119 +++++++++++++++-------
 src/intel/perf/intel_perf.h                       |  29 ++++--
 4 files changed, 138 insertions(+), 51 deletions(-)

diff --git a/src/gallium/drivers/iris/iris_monitor.c b/src/gallium/drivers/iris/iris_monitor.c
index 54d2e669956..f24db9bef41 100644
--- a/src/gallium/drivers/iris/iris_monitor.c
+++ b/src/gallium/drivers/iris/iris_monitor.c
@@ -42,8 +42,8 @@ int
 iris_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
                       struct pipe_driver_query_info *info)
 {
-   const struct iris_screen *screen = (struct iris_screen *)pscreen;
-   const struct intel_perf_config *perf_cfg = screen->perf_cfg;
+   struct iris_screen *screen = (struct iris_screen *)pscreen;
+   struct intel_perf_config *perf_cfg = screen->perf_cfg;
    assert(perf_cfg);
    if (!perf_cfg)
       return 0;
@@ -54,7 +54,12 @@ iris_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
    }
 
    struct intel_perf_query_counter_info *counter_info = &perf_cfg->counter_infos[index];
+   struct intel_perf_query_info *query_info =
+      &perf_cfg->queries[intel_perf_query_counter_info_first_query(counter_info)];
    struct intel_perf_query_counter *counter = counter_info->counter;
+   struct intel_perf_query_result results;
+
+   intel_perf_query_result_clear(&results);
 
    info->group_id = counter_info->location.group_idx;
    info->name = counter->name;
@@ -66,19 +71,27 @@ iris_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
       info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
    switch (counter->data_type) {
    case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
-   case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
+   case INTEL_PERF_COUNTER_DATA_TYPE_UINT32: {
       info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
-      assert(counter->raw_max <= UINT32_MAX);
-      info->max_value.u32 = (uint32_t)counter->raw_max;
+      uint64_t val =
+         counter->oa_counter_max_uint64 ?
+         counter->oa_counter_max_uint64(perf_cfg, query_info, &results) : 0;
+      assert(val <= UINT32_MAX);
+      info->max_value.u32 = (uint32_t)val;
       break;
+   }
    case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
       info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
-      info->max_value.u64 = counter->raw_max;
+      info->max_value.u64 =
+         counter->oa_counter_max_uint64 ?
+         counter->oa_counter_max_uint64(perf_cfg, query_info, &results) : 0;
       break;
    case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
    case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE:
       info->type = PIPE_DRIVER_QUERY_TYPE_FLOAT;
-      info->max_value.f = counter->raw_max;
+      info->max_value.f =
+         counter->oa_counter_max_float ?
+         counter->oa_counter_max_float(perf_cfg, query_info, &results) : 0.0f;
       break;
    default:
       assert(false);
diff --git a/src/gallium/drivers/iris/iris_performance_query.c b/src/gallium/drivers/iris/iris_performance_query.c
index d4a1d46b308..5f19878a7fa 100644
--- a/src/gallium/drivers/iris/iris_performance_query.c
+++ b/src/gallium/drivers/iris/iris_performance_query.c
@@ -176,6 +176,9 @@ iris_get_perf_counter_info(struct pipe_context *pipe,
    const struct intel_perf_query_info *info = &perf_cfg->queries[query_index];
    const struct intel_perf_query_counter *counter =
       &info->counters[counter_index];
+   struct intel_perf_query_result results;
+
+   intel_perf_query_result_clear(&results);
 
    *name = counter->name;
    *desc = counter->desc;
@@ -183,7 +186,16 @@ iris_get_perf_counter_info(struct pipe_context *pipe,
    *data_size = intel_perf_query_counter_get_size(counter);
    *type_enum = counter->type;
    *data_type_enum = counter->data_type;
-   *raw_max = counter->raw_max;
+
+   if (counter->oa_counter_max_uint64) {
+      if (counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_FLOAT ||
+          counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE)
+         *raw_max = counter->oa_counter_max_float(perf_cfg, info, &results);
+      else
+         *raw_max = counter->oa_counter_max_uint64(perf_cfg, info, &results);
+   } else {
+      *raw_max = 0;
+   }
 }
 
 static void
diff --git a/src/intel/perf/gen_perf.py b/src/intel/perf/gen_perf.py
index 36b81da50f6..5ff0245ac22 100644
--- a/src/intel/perf/gen_perf.py
+++ b/src/intel/perf/gen_perf.py
@@ -355,14 +355,14 @@ def output_counter_read(gen, set, counter):
 def output_counter_max(gen, set, counter):
     max_eq = counter.get('max_equation')
 
-    if not counter.has_max_func():
+    if not counter.has_custom_max_func():
         return
 
     c("\n")
     c("/* {0} :: {1} */".format(set.name, counter.get('name')))
 
     if counter.max_hash in hashed_funcs:
-        c("#define %s \\" % counter.max_sym())
+        c("#define %s \\" % counter.max_sym)
         c_indent(3)
         c("%s" % hashed_funcs[counter.max_hash])
         c_outdent(3)
@@ -372,14 +372,18 @@ def output_counter_max(gen, set, counter):
             ret_type = "uint64_t"
 
         c("static " + ret_type)
-        c(counter.max_sym() + "(struct intel_perf_config *perf)\n")
+        c(counter.max_sym + "(struct intel_perf_config *perf,\n")
+        c_indent(len(counter.read_sym) + 1)
+        c("const struct intel_perf_query_info *query,\n")
+        c("const struct intel_perf_query_result *results)\n")
+        c_outdent(len(counter.read_sym) + 1)
         c("{")
         c_indent(3)
         output_rpn_equation_code(set, counter, max_eq)
         c_outdent(3)
         c("}")
 
-        hashed_funcs[counter.max_hash] = counter.max_sym()
+        hashed_funcs[counter.max_hash] = counter.max_sym
 
 
 c_type_sizes = { "uint32_t": 4, "uint64_t": 8, "float": 4, "double": 8, "bool": 4 }
@@ -515,10 +519,17 @@ def output_counter_report(set, counter, counter_to_idx, current_offset):
 
     current_offset = pot_align(current_offset, sizeof(c_type))
 
-    c("intel_perf_query_add_counter(query, " + idx + ", " +
-        str(current_offset) + ", " +
-        set.max_values[counter.get('symbol_name')] + ", (oa_counter_read_func)" +
-        set.read_funcs[counter.get('symbol_name')] + ");\n")
+    if data_type == 'uint64':
+        c("intel_perf_query_add_counter_uint64(query, " + idx + ", " +
+          str(current_offset) + ", " +
+          set.max_funcs[counter.get('symbol_name')] + "," +
+          set.read_funcs[counter.get('symbol_name')] + ");\n")
+    else:
+        c("intel_perf_query_add_counter_float(query, " + idx + ", " +
+          str(current_offset) + ", " +
+          set.max_funcs[counter.get('symbol_name')] + "," +
+          set.read_funcs[counter.get('symbol_name')] + ");\n")
+
 
     if availability:
         c_outdent(3);
@@ -607,6 +618,7 @@ class Counter:
         self.read_sym = "{0}__{1}__{2}__read".format(self.set.gen.chipset,
                                                      self.set.underscore_name,
                                                      self.xml.get('underscore_name'))
+        self.max_sym = self.build_max_sym()
 
     def get(self, prop):
         return self.xml.get(prop)
@@ -632,45 +644,44 @@ class Counter:
         if max_eq:
             self.max_hash = ' '.join(map(replace_token, max_eq.split()))
 
-    def has_max_func(self):
+    def has_custom_max_func(self):
         max_eq = self.xml.get('max_equation')
         if not max_eq:
             return False
 
         try:
             val = float(max_eq)
-            return False
+            if val == 100:
+                return False
         except ValueError:
             pass
 
         for token in max_eq.split():
-            if token[0] == '$' and resolve_variable(token, self.set, False) == None:
+            if token[0] == '$' and resolve_variable(token, self.set, True) == None:
+                print("unresolved token " + token)
                 return False
         return True
 
-    def max_sym(self):
-        assert self.has_max_func()
-        return "{0}__{1}__{2}__max".format(self.set.gen.chipset,
-                                           self.set.underscore_name,
-                                           self.xml.get('underscore_name'))
-
-    def max_value(self):
+    def build_max_sym(self):
         max_eq = self.xml.get('max_equation')
         if not max_eq:
-            return "0 /* undefined */"
+            return "NULL"
 
         try:
-            return "{0}".format(float(max_eq))
+            val = float(max_eq)
+            if val == 100:
+                if self.xml.get('data_type') == 'uint64':
+                    return "percentage_max_uint64"
+                else:
+                    return "percentage_max_float"
         except ValueError:
             pass
 
-        for token in max_eq.split():
-            if token[0] == '$' and resolve_variable(token, self.set, False) == None:
-                return "0 /* unsupported (varies over time) */"
+        assert self.has_custom_max_func()
+        return "{0}__{1}__{2}__max".format(self.set.gen.chipset,
+                                           self.set.underscore_name,
+                                           self.xml.get('underscore_name'))
 
-        return "{0}__{1}__{2}__max(perf)".format(self.set.gen.chipset,
-                                                 self.set.underscore_name,
-                                                 self.xml.get('underscore_name'))
 
 # Wraps a <set> element from the oa-*.xml files.
 class Set:
@@ -679,7 +690,7 @@ class Set:
         self.xml = xml
 
         self.counter_vars = {}
-        self.max_values = {}
+        self.max_funcs = {}
         self.read_funcs = {}
 
         xml_counters = self.xml.findall("counter")
@@ -689,7 +700,7 @@ class Set:
             self.counters.append(counter)
             self.counter_vars['$' + counter.get('symbol_name')] = counter
             self.read_funcs[counter.get('symbol_name')] = counter.read_sym
-            self.max_values[counter.get('symbol_name')] = counter.max_value()
+            self.max_funcs[counter.get('symbol_name')] = counter.max_sym
 
         for counter in self.counters:
             counter.compute_hashes()
@@ -856,13 +867,11 @@ def main():
     c("};\n\n")
 
     c(textwrap.dedent("""\
-        typedef uint64_t (*oa_counter_read_func)(struct intel_perf_config *perf,
-                                                 const struct intel_perf_query_info *query,
-                                                 const struct intel_perf_query_result *results);
         static void ATTRIBUTE_NOINLINE
-        intel_perf_query_add_counter(struct intel_perf_query_info *query,
-                                     int counter_idx, size_t offset,
-                                     uint64_t raw_max, oa_counter_read_func oa_counter_read_uint64)
+        intel_perf_query_add_counter_uint64(struct intel_perf_query_info *query,
+                                            int counter_idx, size_t offset,
+                                            intel_counter_read_uint64_t oa_counter_max,
+                                            intel_counter_read_uint64_t oa_counter_read)
         {
            struct intel_perf_query_counter *dest = &query->counters[query->n_counters++];
            const struct intel_perf_query_counter_data *counter = &counters[counter_idx];
@@ -871,13 +880,51 @@ def main():
            dest->desc = &desc[counter->desc_idx];
            dest->symbol_name = &symbol_name[counter->symbol_name_idx];
            dest->category = &category[counter->category_idx];
-           dest->raw_max = raw_max;
 
            dest->offset = offset;
            dest->type = counter->type;
            dest->data_type = counter->data_type;
            dest->units = counter->units;
-           dest->oa_counter_read_uint64 = oa_counter_read_uint64;
+           dest->oa_counter_max_uint64 = oa_counter_max;
+           dest->oa_counter_read_uint64 = oa_counter_read;
+        }
+
+        static void ATTRIBUTE_NOINLINE
+        intel_perf_query_add_counter_float(struct intel_perf_query_info *query,
+                                           int counter_idx, size_t offset,
+                                           intel_counter_read_float_t oa_counter_max,
+                                           intel_counter_read_float_t oa_counter_read)
+        {
+           struct intel_perf_query_counter *dest = &query->counters[query->n_counters++];
+           const struct intel_perf_query_counter_data *counter = &counters[counter_idx];
+
+           dest->name = &name[counter->name_idx];
+           dest->desc = &desc[counter->desc_idx];
+           dest->symbol_name = &symbol_name[counter->symbol_name_idx];
+           dest->category = &category[counter->category_idx];
+
+           dest->offset = offset;
+           dest->type = counter->type;
+           dest->data_type = counter->data_type;
+           dest->units = counter->units;
+           dest->oa_counter_max_float = oa_counter_max;
+           dest->oa_counter_read_float = oa_counter_read;
+        }
+
+        static float ATTRIBUTE_NOINLINE
+        percentage_max_float(struct intel_perf_config *perf,
+                             const struct intel_perf_query_info *query,
+                             const struct intel_perf_query_result *results)
+        {
+           return 100;
+        }
+
+        static uint64_t ATTRIBUTE_NOINLINE
+        percentage_max_uint64(struct intel_perf_config *perf,
+                              const struct intel_perf_query_info *query,
+                              const struct intel_perf_query_result *results)
+        {
+           return 100;
         }
         """))
 
diff --git a/src/intel/perf/intel_perf.h b/src/intel/perf/intel_perf.h
index 3b0825e4b47..6f26041c443 100644
--- a/src/intel/perf/intel_perf.h
+++ b/src/intel/perf/intel_perf.h
@@ -37,6 +37,7 @@
 
 #include "compiler/glsl/list.h"
 #include "dev/intel_device_info.h"
+#include "util/bitscan.h"
 #include "util/hash_table.h"
 #include "util/ralloc.h"
 
@@ -178,6 +179,14 @@ struct intel_perf_query_result {
    bool query_disjoint;
 };
 
+typedef uint64_t (*intel_counter_read_uint64_t)(struct intel_perf_config *perf,
+                                                const struct intel_perf_query_info *query,
+                                                const struct intel_perf_query_result *results);
+
+typedef float (*intel_counter_read_float_t)(struct intel_perf_config *perf,
+                                            const struct intel_perf_query_info *query,
+                                            const struct intel_perf_query_result *results);
+
 struct intel_perf_query_counter {
    const char *name;
    const char *desc;
@@ -186,16 +195,16 @@ struct intel_perf_query_counter {
    enum intel_perf_counter_type type;
    enum intel_perf_counter_data_type data_type;
    enum intel_perf_counter_units units;
-   uint64_t raw_max;
    size_t offset;
 
    union {
-      uint64_t (*oa_counter_read_uint64)(struct intel_perf_config *perf,
-                                         const struct intel_perf_query_info *query,
-                                         const struct intel_perf_query_result *results);
-      float (*oa_counter_read_float)(struct intel_perf_config *perf,
-                                     const struct intel_perf_query_info *query,
-                                     const struct intel_perf_query_result *results);
+      intel_counter_read_uint64_t oa_counter_max_uint64;
+      intel_counter_read_float_t  oa_counter_max_float;
+   };
+
+   union {
+      intel_counter_read_uint64_t oa_counter_read_uint64;
+      intel_counter_read_float_t  oa_counter_read_float;
       struct intel_pipeline_stat pipeline_stat;
    };
 };
@@ -429,6 +438,12 @@ uint64_t intel_perf_store_configuration(struct intel_perf_config *perf_cfg, int
                                         const struct intel_perf_registers *config,
                                         const char *guid);
 
+static inline unsigned
+intel_perf_query_counter_info_first_query(const struct intel_perf_query_counter_info *counter_info)
+{
+   return ffsll(counter_info->query_mask);
+}
+
 /** Read the slice/unslice frequency from 2 OA reports and store then into
  *  result.
  */



More information about the mesa-commit mailing list