Mesa (master): intel/perf: move gt_frequency to results

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Jan 15 13:04:39 UTC 2021


Module: Mesa
Branch: master
Commit: 9a54aa131e958a890080036bbeb1e17a469b7cfe
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=9a54aa131e958a890080036bbeb1e17a469b7cfe

Author: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Date:   Tue Sep  8 14:33:12 2020 +0300

intel/perf: move gt_frequency to results

We want to unify things a bit between GL & Vulkan. So store those
values in the results rather than just in the GL query code.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Reviewed-by: Marcin Ślusarz <marcin.slusarz at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8525>

---

 src/intel/perf/gen_perf.c       | 29 ++++++++++++++++++++++++++++
 src/intel/perf/gen_perf.h       | 13 +++++++++++++
 src/intel/perf/gen_perf_mdapi.c | 16 ++++++++--------
 src/intel/perf/gen_perf_mdapi.h |  4 ++--
 src/intel/perf/gen_perf_query.c | 42 ++---------------------------------------
 src/intel/vulkan/genX_query.c   | 17 +++++------------
 6 files changed, 59 insertions(+), 62 deletions(-)

diff --git a/src/intel/perf/gen_perf.c b/src/intel/perf/gen_perf.c
index 0b88fb2c594..4530bb02e33 100644
--- a/src/intel/perf/gen_perf.c
+++ b/src/intel/perf/gen_perf.c
@@ -1088,6 +1088,35 @@ gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
 
 }
 
+#define GET_FIELD(word, field) (((word)  & field ## _MASK) >> field ## _SHIFT)
+
+void
+gen_perf_query_result_read_gt_frequency(struct gen_perf_query_result *result,
+                                        const struct gen_device_info *devinfo,
+                                        const uint32_t start,
+                                        const uint32_t end)
+{
+   switch (devinfo->gen) {
+   case 7:
+   case 8:
+      result->gt_frequency[0] = GET_FIELD(start, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
+      result->gt_frequency[1] = GET_FIELD(end, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
+      break;
+   case 9:
+   case 11:
+   case 12:
+      result->gt_frequency[0] = GET_FIELD(start, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
+      result->gt_frequency[1] = GET_FIELD(end, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
+      break;
+   default:
+      unreachable("unexpected gen");
+   }
+
+   /* Put the numbers into Hz. */
+   result->gt_frequency[0] *= 1000000ULL;
+   result->gt_frequency[1] *= 1000000ULL;
+}
+
 void
 gen_perf_query_result_clear(struct gen_perf_query_result *result)
 {
diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h
index c91f9eeb55c..4348c731109 100644
--- a/src/intel/perf/gen_perf.h
+++ b/src/intel/perf/gen_perf.h
@@ -150,6 +150,11 @@ struct gen_perf_query_result {
     */
    uint64_t unslice_frequency[2];
 
+   /**
+    * Frequency of the whole GT at the begin and end of the query.
+    */
+   uint64_t gt_frequency[2];
+
    /**
     * Timestamp of the query.
     */
@@ -357,6 +362,14 @@ void gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result
                                             const struct gen_device_info *devinfo,
                                             const uint32_t *start,
                                             const uint32_t *end);
+
+/** Store the GT frequency as reported by the RPSTAT register.
+ */
+void gen_perf_query_result_read_gt_frequency(struct gen_perf_query_result *result,
+                                             const struct gen_device_info *devinfo,
+                                             const uint32_t start,
+                                             const uint32_t end);
+
 /** Accumulate the delta between 2 OA reports into result for a given query.
  */
 void gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
diff --git a/src/intel/perf/gen_perf_mdapi.c b/src/intel/perf/gen_perf_mdapi.c
index 19f1be28bbc..2452b99f59f 100644
--- a/src/intel/perf/gen_perf_mdapi.c
+++ b/src/intel/perf/gen_perf_mdapi.c
@@ -34,8 +34,8 @@
 int
 gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
                                   const struct gen_device_info *devinfo,
-                                  const struct gen_perf_query_result *result,
-                                  uint64_t freq_start, uint64_t freq_end)
+                                  const struct gen_perf_query_info *query,
+                                  const struct gen_perf_query_result *result)
 {
    switch (devinfo->gen) {
    case 7: {
@@ -57,8 +57,8 @@ gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
       mdapi_data->ReportsCount = result->reports_accumulated;
       mdapi_data->TotalTime =
          gen_device_info_timebase_scale(devinfo, result->accumulator[0]);
-      mdapi_data->CoreFrequency = freq_end;
-      mdapi_data->CoreFrequencyChanged = freq_end != freq_start;
+      mdapi_data->CoreFrequency = result->gt_frequency[1];
+      mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
       mdapi_data->SplitOccured = result->query_disjoint;
       return sizeof(*mdapi_data);
    }
@@ -82,8 +82,8 @@ gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
       mdapi_data->BeginTimestamp =
          gen_device_info_timebase_scale(devinfo, result->begin_timestamp);
       mdapi_data->GPUTicks = result->accumulator[1];
-      mdapi_data->CoreFrequency = freq_end;
-      mdapi_data->CoreFrequencyChanged = freq_end != freq_start;
+      mdapi_data->CoreFrequency = result->gt_frequency[1];
+      mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
       mdapi_data->SliceFrequency =
          (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
       mdapi_data->UnsliceFrequency =
@@ -113,8 +113,8 @@ gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
       mdapi_data->BeginTimestamp =
          gen_device_info_timebase_scale(devinfo, result->begin_timestamp);
       mdapi_data->GPUTicks = result->accumulator[1];
-      mdapi_data->CoreFrequency = freq_end;
-      mdapi_data->CoreFrequencyChanged = freq_end != freq_start;
+      mdapi_data->CoreFrequency = result->gt_frequency[1];
+      mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
       mdapi_data->SliceFrequency =
          (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
       mdapi_data->UnsliceFrequency =
diff --git a/src/intel/perf/gen_perf_mdapi.h b/src/intel/perf/gen_perf_mdapi.h
index 8be8d2033ac..acf1edd6e79 100644
--- a/src/intel/perf/gen_perf_mdapi.h
+++ b/src/intel/perf/gen_perf_mdapi.h
@@ -129,8 +129,8 @@ struct mdapi_pipeline_metrics {
 
 int gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
                                       const struct gen_device_info *devinfo,
-                                      const struct gen_perf_query_result *result,
-                                      uint64_t freq_start, uint64_t freq_end);
+                                      const struct gen_perf_query_info *query,
+                                      const struct gen_perf_query_result *result);
 
 static inline void gen_perf_query_mdapi_write_perfcntr(void *data, uint32_t data_size,
                                                        const struct gen_device_info *devinfo,
diff --git a/src/intel/perf/gen_perf_query.c b/src/intel/perf/gen_perf_query.c
index 288f261f55a..e6d38b6bb72 100644
--- a/src/intel/perf/gen_perf_query.c
+++ b/src/intel/perf/gen_perf_query.c
@@ -218,11 +218,6 @@ struct gen_perf_query_object
           */
          bool results_accumulated;
 
-         /**
-          * Frequency of the GT at begin and end of the query.
-          */
-         uint64_t gt_frequency[2];
-
          /**
           * Accumulated OA results between begin and end of the query.
           */
@@ -1405,37 +1400,6 @@ gen_perf_delete_query(struct gen_perf_context *perf_ctx,
    free(query);
 }
 
-#define GET_FIELD(word, field) (((word)  & field ## _MASK) >> field ## _SHIFT)
-
-static void
-read_gt_frequency(struct gen_perf_context *perf_ctx,
-                  struct gen_perf_query_object *obj)
-{
-   const struct gen_device_info *devinfo = perf_ctx->devinfo;
-   uint32_t start = *((uint32_t *)(obj->oa.map + MI_FREQ_START_OFFSET_BYTES)),
-      end = *((uint32_t *)(obj->oa.map + MI_FREQ_END_OFFSET_BYTES));
-
-   switch (devinfo->gen) {
-   case 7:
-   case 8:
-      obj->oa.gt_frequency[0] = GET_FIELD(start, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
-      obj->oa.gt_frequency[1] = GET_FIELD(end, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
-      break;
-   case 9:
-   case 11:
-   case 12:
-      obj->oa.gt_frequency[0] = GET_FIELD(start, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
-      obj->oa.gt_frequency[1] = GET_FIELD(end, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
-      break;
-   default:
-      unreachable("unexpected gen");
-   }
-
-   /* Put the numbers into Hz. */
-   obj->oa.gt_frequency[0] *= 1000000ULL;
-   obj->oa.gt_frequency[1] *= 1000000ULL;
-}
-
 static int
 get_oa_counter_data(struct gen_perf_context *perf_ctx,
                     struct gen_perf_query_object *query,
@@ -1540,7 +1504,6 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
          while (!read_oa_samples_for_query(perf_ctx, query, current_batch))
             ;
 
-         read_gt_frequency(perf_ctx, query);
          uint32_t *begin_report = query->oa.map;
          uint32_t *end_report = query->oa.map + MI_RPC_BO_END_OFFSET_BYTES;
          gen_perf_query_result_read_frequencies(&query->oa.result,
@@ -1559,9 +1522,8 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
          const struct gen_device_info *devinfo = perf_ctx->devinfo;
 
          written = gen_perf_query_result_write_mdapi((uint8_t *)data, data_size,
-                                                     devinfo, &query->oa.result,
-                                                     query->oa.gt_frequency[0],
-                                                     query->oa.gt_frequency[1]);
+                                                     devinfo, query->queryinfo,
+                                                     &query->oa.result);
       }
       break;
 
diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
index 3fd662cc062..5994488960d 100644
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -528,29 +528,22 @@ VkResult genX(GetQueryPoolResults)(
       case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
          if (!write_results)
             break;
+         const struct gen_perf_query_info *query = &device->physical->perf->queries[0];
          const void *query_data = query_slot(pool, firstQuery + i);
          const uint32_t *oa_begin = query_data + intel_perf_mi_rpc_offset(false);
          const uint32_t *oa_end = query_data + intel_perf_mi_rpc_offset(true);
          const uint32_t *rpstat_begin = query_data + intel_perf_rpstart_offset(false);
          const uint32_t *rpstat_end = query_data + intel_perf_mi_rpc_offset(true);
          struct gen_perf_query_result result;
-         uint32_t core_freq[2];
-#if GEN_GEN < 9
-         core_freq[0] = ((*rpstat_begin >> 7) & 0x7f) * 1000000ULL;
-         core_freq[1] = ((*rpstat_end >> 7) & 0x7f) * 1000000ULL;
-#else
-         core_freq[0] = ((*rpstat_begin >> 23) & 0x1ff) * 1000000ULL;
-         core_freq[1] = ((*rpstat_end >> 23) & 0x1ff) * 1000000ULL;
-#endif
          gen_perf_query_result_clear(&result);
-         gen_perf_query_result_accumulate(&result, &device->physical->perf->queries[0],
-                                          oa_begin, oa_end);
+         gen_perf_query_result_accumulate(&result, query, oa_begin, oa_end);
          gen_perf_query_result_read_frequencies(&result, &device->info,
                                                 oa_begin, oa_end);
+         gen_perf_query_result_read_gt_frequency(&result, &device->info,
+                                                 *rpstat_begin, *rpstat_end);
          gen_perf_query_result_write_mdapi(pData, stride,
                                            &device->info,
-                                           &result,
-                                           core_freq[0], core_freq[1]);
+                                           query, &result);
 #if GEN_GEN >= 8 && GEN_GEN <= 11
          gen_perf_query_mdapi_write_perfcntr(pData, stride, &device->info,
                                              query_data + intel_perf_counter(false),



More information about the mesa-commit mailing list