[Mesa-dev] [PATCH 05/19] i965: move OA accumulation code to intel/perf

Lionel Landwerlin lionel.g.landwerlin at intel.com
Mon Jun 18 17:39:26 UTC 2018


We'll want to reuse this in our Vulkan extension.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
---
 src/intel/perf/gen_perf.c                     | 135 ++++++++++++++++++
 src/intel/perf/gen_perf.h                     |  81 ++++++-----
 .../drivers/dri/i965/brw_performance_query.c  | 134 +++--------------
 .../drivers/dri/i965/brw_performance_query.h  |  37 +----
 .../dri/i965/brw_performance_query_mdapi.c    |  41 +++---
 5 files changed, 229 insertions(+), 199 deletions(-)

diff --git a/src/intel/perf/gen_perf.c b/src/intel/perf/gen_perf.c
index 3a8819e6ba9..979195f3d86 100644
--- a/src/intel/perf/gen_perf.c
+++ b/src/intel/perf/gen_perf.c
@@ -433,3 +433,138 @@ gen_perf_load_oa_metrics(struct gen_perf *perf, int fd,
 
    return true;
 }
+
+/* Accumulate 32bits OA counters */
+static inline void
+accumulate_uint32(const uint32_t *report0,
+                  const uint32_t *report1,
+                  uint64_t *accumulator)
+{
+   *accumulator += (uint32_t)(*report1 - *report0);
+}
+
+/* Accumulate 40bits OA counters */
+static inline void
+accumulate_uint40(int a_index,
+                  const uint32_t *report0,
+                  const uint32_t *report1,
+                  uint64_t *accumulator)
+{
+   const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40);
+   const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40);
+   uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32;
+   uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32;
+   uint64_t value0 = report0[a_index + 4] | high0;
+   uint64_t value1 = report1[a_index + 4] | high1;
+   uint64_t delta;
+
+   if (value0 > value1)
+      delta = (1ULL << 40) + value1 - value0;
+   else
+      delta = value1 - value0;
+
+   *accumulator += delta;
+}
+
+static void
+gen8_read_report_clock_ratios(const uint32_t *report,
+                              uint64_t *slice_freq_hz,
+                              uint64_t *unslice_freq_hz)
+{
+   /* The lower 16bits of the RPT_ID field of the OA reports contains a
+    * snapshot of the bits coming from the RP_FREQ_NORMAL register and is
+    * divided this way :
+    *
+    * RPT_ID[31:25]: RP_FREQ_NORMAL[20:14] (low squashed_slice_clock_frequency)
+    * RPT_ID[10:9]:  RP_FREQ_NORMAL[22:21] (high squashed_slice_clock_frequency)
+    * RPT_ID[8:0]:   RP_FREQ_NORMAL[31:23] (squashed_unslice_clock_frequency)
+    *
+    * RP_FREQ_NORMAL[31:23]: Software Unslice Ratio Request
+    *                        Multiple of 33.33MHz 2xclk (16 MHz 1xclk)
+    *
+    * RP_FREQ_NORMAL[22:14]: Software Slice Ratio Request
+    *                        Multiple of 33.33MHz 2xclk (16 MHz 1xclk)
+    */
+
+   uint32_t unslice_freq = report[0] & 0x1ff;
+   uint32_t slice_freq_low = (report[0] >> 25) & 0x7f;
+   uint32_t slice_freq_high = (report[0] >> 9) & 0x3;
+   uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7);
+
+   *slice_freq_hz = slice_freq * 16666667ULL;
+   *unslice_freq_hz = unslice_freq * 16666667ULL;
+}
+
+void
+gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result,
+                                       const struct gen_device_info *devinfo,
+                                       const uint32_t *start,
+                                       const uint32_t *end)
+{
+   /* Slice/Unslice frequency is only available in the OA reports when the
+    * "Disable OA reports due to clock ratio change" field in
+    * OA_DEBUG_REGISTER is set to 1. This is how the kernel programs this
+    * global register (see drivers/gpu/drm/i915/i915_perf.c)
+    *
+    * Documentation says this should be available on Gen9+ but experimentation
+    * shows that Gen8 reports similar values, so we enable it there too.
+    */
+   if (devinfo->gen < 8)
+      return;
+
+   gen8_read_report_clock_ratios(start,
+                                 &result->slice_frequency[0],
+                                 &result->unslice_frequency[0]);
+   gen8_read_report_clock_ratios(end,
+                                 &result->slice_frequency[1],
+                                 &result->unslice_frequency[1]);
+}
+
+void
+gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
+                                 const struct gen_perf_query_info *query,
+                                 const uint32_t *start,
+                                 const uint32_t *end)
+{
+   int i, idx = 0;
+
+   result->hw_id = start[2];
+   result->reports_accumulated++;
+
+   switch (query->oa_format) {
+   case I915_OA_FORMAT_A32u40_A4u32_B8_C8:
+      accumulate_uint32(start + 1, end + 1, result->accumulator + idx++); /* timestamp */
+      accumulate_uint32(start + 3, end + 3, result->accumulator + idx++); /* clock */
+
+      /* 32x 40bit A counters... */
+      for (i = 0; i < 32; i++)
+         accumulate_uint40(i, start, end, result->accumulator + idx++);
+
+      /* 4x 32bit A counters... */
+      for (i = 0; i < 4; i++)
+         accumulate_uint32(start + 36 + i, end + 36 + i, result->accumulator + idx++);
+
+      /* 8x 32bit B counters + 8x 32bit C counters... */
+      for (i = 0; i < 16; i++)
+         accumulate_uint32(start + 48 + i, end + 48 + i, result->accumulator + idx++);
+      break;
+
+   case I915_OA_FORMAT_A45_B8_C8:
+      accumulate_uint32(start + 1, end + 1, result->accumulator); /* timestamp */
+
+      for (i = 0; i < 61; i++)
+         accumulate_uint32(start + 3 + i, end + 3 + i, result->accumulator + 1 + i);
+      break;
+
+   default:
+      unreachable("Can't accumulate OA counters in unknown format");
+   }
+
+}
+
+void
+gen_perf_query_result_clear(struct gen_perf_query_result *result)
+{
+   memset(result, 0, sizeof(*result));
+   result->hw_id = 0xffffffff; /* invalid */
+}
diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h
index 5d47ebd2925..24fbd15cac1 100644
--- a/src/intel/perf/gen_perf.h
+++ b/src/intel/perf/gen_perf.h
@@ -61,6 +61,44 @@ struct gen_pipeline_stat {
    uint32_t denominator;
 };
 
+/*
+ * The largest OA formats we can use include:
+ * For Haswell:
+ *   1 timestamp, 45 A counters, 8 B counters and 8 C counters.
+ * For Gen8+
+ *   1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
+ */
+#define MAX_OA_REPORT_COUNTERS 62
+
+struct gen_perf_query_result {
+   /**
+    * Storage for the final accumulated OA counters.
+    */
+   uint64_t accumulator[MAX_OA_REPORT_COUNTERS];
+
+   /**
+    * Hw ID used by the context on which the query was running.
+    */
+   uint32_t hw_id;
+
+   /**
+    * Number of reports accumulated to produce the results.
+    */
+   uint32_t reports_accumulated;
+
+   /**
+    * Frequency in the slices of the GT at the begin and end of the
+    * query.
+    */
+   uint64_t slice_frequency[2];
+
+   /**
+    * Frequency in the unslice of the GT at the begin and end of the
+    * query.
+    */
+   uint64_t unslice_frequency[2];
+};
+
 struct gen_perf_query_counter {
    const char *name;
    const char *desc;
@@ -208,38 +246,6 @@ gen_perf_query_info_add_basic_stat_reg(struct gen_perf_query_info *query,
    gen_perf_query_info_add_stat_reg(query, reg, 1, 1, name, name);
 }
 
-/* Accumulate 32bits OA counters */
-static inline void
-gen_perf_query_accumulate_uint32(const uint32_t *report0,
-                                 const uint32_t *report1,
-                                 uint64_t *accumulator)
-{
-   *accumulator += (uint32_t)(*report1 - *report0);
-}
-
-/* Accumulate 40bits OA counters */
-static inline void
-gen_perf_query_accumulate_uint40(int a_index,
-                                 const uint32_t *report0,
-                                 const uint32_t *report1,
-                                 uint64_t *accumulator)
-{
-   const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40);
-   const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40);
-   uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32;
-   uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32;
-   uint64_t value0 = report0[a_index + 4] | high0;
-   uint64_t value1 = report1[a_index + 4] | high1;
-   uint64_t delta;
-
-   if (value0 > value1)
-      delta = (1ULL << 40) + value1 - value0;
-   else
-      delta = value1 - value0;
-
-   *accumulator += delta;
-}
-
 static inline struct gen_perf *
 gen_perf_new(void *ctx, int (*ioctl_cb)(int, unsigned long, void *))
 {
@@ -255,4 +261,15 @@ bool gen_perf_load_oa_metrics(struct gen_perf *perf, int fd,
 bool gen_perf_load_metric_id(struct gen_perf *perf, const char *guid,
                              uint64_t *metric_id);
 
+void gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result,
+                                            const struct gen_device_info *devinfo,
+                                            const uint32_t *start,
+                                            const uint32_t *end);
+void gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
+                                      const struct gen_perf_query_info *query,
+                                      const uint32_t *start,
+                                      const uint32_t *end);
+void gen_perf_query_result_clear(struct gen_perf_query_result *result);
+
+
 #endif /* GEN_PERF_H */
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c
index fe79651a28d..8dae7721860 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -541,55 +541,6 @@ drop_from_unaccumulated_query_list(struct brw_context *brw,
    reap_old_sample_buffers(brw);
 }
 
-/**
- * Given pointers to starting and ending OA snapshots, add the deltas for each
- * counter to the results.
- */
-static void
-add_deltas(struct brw_context *brw,
-           struct brw_perf_query_object *obj,
-           const uint32_t *start,
-           const uint32_t *end)
-{
-   const struct gen_perf_query_info *query = obj->query;
-   uint64_t *accumulator = obj->oa.accumulator;
-   int idx = 0;
-   int i;
-
-   obj->oa.reports_accumulated++;
-
-   switch (query->oa_format) {
-   case I915_OA_FORMAT_A32u40_A4u32_B8_C8:
-      gen_perf_query_accumulate_uint32(start + 1, end + 1, accumulator + idx++); /* timestamp */
-      gen_perf_query_accumulate_uint32(start + 3, end + 3, accumulator + idx++); /* clock */
-
-      /* 32x 40bit A counters... */
-      for (i = 0; i < 32; i++)
-         gen_perf_query_accumulate_uint40(i, start, end, accumulator + idx++);
-
-      /* 4x 32bit A counters... */
-      for (i = 0; i < 4; i++)
-         gen_perf_query_accumulate_uint32(start + 36 + i, end + 36 + i,
-                                          accumulator + idx++);
-
-      /* 8x 32bit B counters + 8x 32bit C counters... */
-      for (i = 0; i < 16; i++)
-         gen_perf_query_accumulate_uint32(start + 48 + i, end + 48 + i,
-                                          accumulator + idx++);
-
-      break;
-   case I915_OA_FORMAT_A45_B8_C8:
-      gen_perf_query_accumulate_uint32(start + 1, end + 1, accumulator); /* timestamp */
-
-      for (i = 0; i < 61; i++)
-         gen_perf_query_accumulate_uint32(start + 3 + i, end + 3 + i, accumulator + 1 + i);
-
-      break;
-   default:
-      unreachable("Can't accumulate OA counters in unknown format");
-   }
-}
-
 static bool
 inc_n_oa_users(struct brw_context *brw)
 {
@@ -800,8 +751,6 @@ accumulate_oa_reports(struct brw_context *brw,
       goto error;
    }
 
-   obj->oa.hw_id = start[2];
-
    /* See if we have any periodic reports to accumulate too... */
 
    /* N.B. The oa.samples_head was set when the query began and
@@ -855,11 +804,11 @@ accumulate_oa_reports(struct brw_context *brw,
              * of OA counters while any other context is acctive.
              */
             if (devinfo->gen >= 8) {
-               if (in_ctx && report[2] != obj->oa.hw_id) {
+               if (in_ctx && report[2] != obj->oa.result.hw_id) {
                   DBG("i915 perf: Switch AWAY (observed by ID change)\n");
                   in_ctx = false;
                   out_duration = 0;
-               } else if (in_ctx == false && report[2] == obj->oa.hw_id) {
+               } else if (in_ctx == false && report[2] == obj->oa.result.hw_id) {
                   DBG("i915 perf: Switch TO\n");
                   in_ctx = true;
 
@@ -876,18 +825,20 @@ accumulate_oa_reports(struct brw_context *brw,
                   if (out_duration >= 1)
                      add = false;
                } else if (in_ctx) {
-                  assert(report[2] == obj->oa.hw_id);
+                  assert(report[2] == obj->oa.result.hw_id);
                   DBG("i915 perf: Continuation IN\n");
                } else {
-                  assert(report[2] != obj->oa.hw_id);
+                  assert(report[2] != obj->oa.result.hw_id);
                   DBG("i915 perf: Continuation OUT\n");
                   add = false;
                   out_duration++;
                }
             }
 
-            if (add)
-               add_deltas(brw, obj, last, report);
+            if (add) {
+               gen_perf_query_result_accumulate(&obj->oa.result, obj->query,
+                                                last, report);
+            }
 
             last = report;
 
@@ -906,7 +857,8 @@ accumulate_oa_reports(struct brw_context *brw,
 
 end:
 
-   add_deltas(brw, obj, last, end);
+   gen_perf_query_result_accumulate(&obj->oa.result, obj->query,
+                                    last, end);
 
    DBG("Marking %d accumulated - results gathered\n", o->Id);
 
@@ -1210,8 +1162,7 @@ brw_begin_perf_query(struct gl_context *ctx,
        */
       buf->refcount++;
 
-      obj->oa.hw_id = 0xffffffff;
-      memset(obj->oa.accumulator, 0, sizeof(obj->oa.accumulator));
+      gen_perf_query_result_clear(&obj->oa.result);
       obj->oa.results_accumulated = false;
 
       add_to_unaccumulated_query_list(brw, obj);
@@ -1380,62 +1331,15 @@ brw_is_perf_query_ready(struct gl_context *ctx,
    return false;
 }
 
-static void
-gen8_read_report_clock_ratios(const uint32_t *report,
-                              uint64_t *slice_freq_hz,
-                              uint64_t *unslice_freq_hz)
-{
-   /* The lower 16bits of the RPT_ID field of the OA reports contains a
-    * snapshot of the bits coming from the RP_FREQ_NORMAL register and is
-    * divided this way :
-    *
-    * RPT_ID[31:25]: RP_FREQ_NORMAL[20:14] (low squashed_slice_clock_frequency)
-    * RPT_ID[10:9]:  RP_FREQ_NORMAL[22:21] (high squashed_slice_clock_frequency)
-    * RPT_ID[8:0]:   RP_FREQ_NORMAL[31:23] (squashed_unslice_clock_frequency)
-    *
-    * RP_FREQ_NORMAL[31:23]: Software Unslice Ratio Request
-    *                        Multiple of 33.33MHz 2xclk (16 MHz 1xclk)
-    *
-    * RP_FREQ_NORMAL[22:14]: Software Slice Ratio Request
-    *                        Multiple of 33.33MHz 2xclk (16 MHz 1xclk)
-    */
-
-   uint32_t unslice_freq = report[0] & 0x1ff;
-   uint32_t slice_freq_low = (report[0] >> 25) & 0x7f;
-   uint32_t slice_freq_high = (report[0] >> 9) & 0x3;
-   uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7);
-
-   *slice_freq_hz = slice_freq * 16666667ULL;
-   *unslice_freq_hz = unslice_freq * 16666667ULL;
-}
-
 static void
 read_slice_unslice_frequencies(struct brw_context *brw,
                                struct brw_perf_query_object *obj)
 {
    const struct gen_device_info *devinfo = &brw->screen->devinfo;
-   uint32_t *begin_report, *end_report;
-
-   /* Slice/Unslice frequency is only available in the OA reports when the
-    * "Disable OA reports due to clock ratio change" field in
-    * OA_DEBUG_REGISTER is set to 1. This is how the kernel programs this
-    * global register (see drivers/gpu/drm/i915/i915_perf.c)
-    *
-    * Documentation says this should be available on Gen9+ but experimentation
-    * shows that Gen8 reports similar values, so we enable it there too.
-    */
-   if (devinfo->gen < 8)
-      return;
-
-   begin_report = obj->oa.map;
-   end_report = obj->oa.map + MI_RPC_BO_END_OFFSET_BYTES;
+   uint32_t *begin_report = obj->oa.map, *end_report = obj->oa.map + MI_RPC_BO_END_OFFSET_BYTES;
 
-   gen8_read_report_clock_ratios(begin_report,
-                                 &obj->oa.slice_frequency[0],
-                                 &obj->oa.unslice_frequency[0]);
-   gen8_read_report_clock_ratios(end_report,
-                                 &obj->oa.slice_frequency[1],
-                                 &obj->oa.unslice_frequency[1]);
+   gen_perf_query_result_read_frequencies(&obj->oa.result,
+                                          devinfo, begin_report, end_report);
 }
 
 static void
@@ -1487,13 +1391,15 @@ get_oa_counter_data(struct brw_context *brw,
          switch (counter->data_type) {
          case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
             out_uint64 = (uint64_t *)(data + counter->offset);
-            *out_uint64 = counter->oa_counter_read_uint64(perf, query,
-                                                          obj->oa.accumulator);
+            *out_uint64 =
+               counter->oa_counter_read_uint64(perf, query,
+                                               obj->oa.result.accumulator);
             break;
          case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
             out_float = (float *)(data + counter->offset);
-            *out_float = counter->oa_counter_read_float(perf, query,
-                                                        obj->oa.accumulator);
+            *out_float =
+               counter->oa_counter_read_float(perf, query,
+                                              obj->oa.result.accumulator);
             break;
          default:
             /* So far we aren't using uint32, double or bool32... */
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.h b/src/mesa/drivers/dri/i965/brw_performance_query.h
index ca0503422ca..86632e06a61 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.h
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.h
@@ -28,6 +28,8 @@
 
 #include "brw_context.h"
 
+#include "perf/gen_perf.h"
+
 struct gen_perf_query_info;
 
 /*
@@ -38,15 +40,6 @@ struct gen_perf_query_info;
 #define STATS_BO_END_OFFSET_BYTES   (STATS_BO_SIZE / 2)
 #define MAX_STAT_COUNTERS           (STATS_BO_END_OFFSET_BYTES / 8)
 
-/*
- * The largest OA formats we can use include:
- * For Haswell:
- *   1 timestamp, 45 A counters, 8 B counters and 8 C counters.
- * For Gen8+
- *   1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
- */
-#define MAX_OA_REPORT_COUNTERS 62
-
 /**
  * i965 representation of a performance query object.
  *
@@ -93,16 +86,6 @@ struct brw_perf_query_object
           */
          struct exec_node *samples_head;
 
-         /**
-          * Storage for the final accumulated OA counters.
-          */
-         uint64_t accumulator[MAX_OA_REPORT_COUNTERS];
-
-         /**
-          * Hw ID used by the context on which the query was running.
-          */
-         uint32_t hw_id;
-
          /**
           * false while in the unaccumulated_elements list, and set to
           * true when the final, end MI_RPC snapshot has been
@@ -110,27 +93,15 @@ struct brw_perf_query_object
           */
          bool results_accumulated;
 
-         /**
-          * Number of reports accumulated to produce the results.
-          */
-         uint32_t reports_accumulated;
-
          /**
           * Frequency of the GT at begin and end of the query.
           */
          uint64_t gt_frequency[2];
 
          /**
-          * Frequency in the slices of the GT at the begin and end of the
-          * query.
-          */
-         uint64_t slice_frequency[2];
-
-         /**
-          * Frequency in the unslice of the GT at the begin and end of the
-          * query.
+          * Accumulated OA results between begin and end of the query.
           */
-         uint64_t unslice_frequency[2];
+         struct gen_perf_query_result result;
       } oa;
 
       struct {
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c b/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c
index 916b14c14ba..159f31441c5 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c
@@ -34,6 +34,7 @@ brw_perf_query_get_mdapi_oa_data(struct brw_context *brw,
                                  uint8_t *data)
 {
    const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   const struct gen_perf_query_result *result = &obj->oa.result;
 
    switch (devinfo->gen) {
    case 7: {
@@ -45,15 +46,15 @@ brw_perf_query_get_mdapi_oa_data(struct brw_context *brw,
       assert(devinfo->is_haswell);
 
       for (int i = 0; i < ARRAY_SIZE(mdapi_data->ACounters); i++)
-         mdapi_data->ACounters[i] = obj->oa.accumulator[1 + i];
+         mdapi_data->ACounters[i] = result->accumulator[1 + i];
 
       for (int i = 0; i < ARRAY_SIZE(mdapi_data->NOACounters); i++) {
          mdapi_data->NOACounters[i] =
-            obj->oa.accumulator[1 + ARRAY_SIZE(mdapi_data->ACounters) + i];
+            result->accumulator[1 + ARRAY_SIZE(mdapi_data->ACounters) + i];
       }
 
-      mdapi_data->ReportsCount = obj->oa.reports_accumulated;
-      mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]);
+      mdapi_data->ReportsCount = result->reports_accumulated;
+      mdapi_data->TotalTime = brw_timebase_scale(brw, result->accumulator[0]);
       mdapi_data->CoreFrequency = obj->oa.gt_frequency[1];
       mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1];
       return sizeof(*mdapi_data);
@@ -65,20 +66,20 @@ brw_perf_query_get_mdapi_oa_data(struct brw_context *brw,
          return 0;
 
       for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
-         mdapi_data->OaCntr[i] = obj->oa.accumulator[2 + i];
+         mdapi_data->OaCntr[i] = result->accumulator[2 + i];
       for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
          mdapi_data->NoaCntr[i] =
-            obj->oa.accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
+            result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
       }
 
-      mdapi_data->ReportId = obj->oa.hw_id;
-      mdapi_data->ReportsCount = obj->oa.reports_accumulated;
-      mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]);
-      mdapi_data->GPUTicks = obj->oa.accumulator[1];
+      mdapi_data->ReportId = result->hw_id;
+      mdapi_data->ReportsCount = result->reports_accumulated;
+      mdapi_data->TotalTime = brw_timebase_scale(brw, result->accumulator[0]);
+      mdapi_data->GPUTicks = result->accumulator[1];
       mdapi_data->CoreFrequency = obj->oa.gt_frequency[1];
       mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1];
-      mdapi_data->SliceFrequency = (obj->oa.slice_frequency[0] + obj->oa.slice_frequency[1]) / 2ULL;
-      mdapi_data->UnsliceFrequency = (obj->oa.unslice_frequency[0] + obj->oa.unslice_frequency[1]) / 2ULL;
+      mdapi_data->SliceFrequency = (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
+      mdapi_data->UnsliceFrequency = (result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL;
 
       return sizeof(*mdapi_data);
    }
@@ -91,20 +92,20 @@ brw_perf_query_get_mdapi_oa_data(struct brw_context *brw,
          return 0;
 
       for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
-         mdapi_data->OaCntr[i] = obj->oa.accumulator[2 + i];
+         mdapi_data->OaCntr[i] = result->accumulator[2 + i];
       for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
          mdapi_data->NoaCntr[i] =
-            obj->oa.accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
+            result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
       }
 
-      mdapi_data->ReportId = obj->oa.hw_id;
-      mdapi_data->ReportsCount = obj->oa.reports_accumulated;
-      mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]);
-      mdapi_data->GPUTicks = obj->oa.accumulator[1];
+      mdapi_data->ReportId = result->hw_id;
+      mdapi_data->ReportsCount = result->reports_accumulated;
+      mdapi_data->TotalTime = brw_timebase_scale(brw, result->accumulator[0]);
+      mdapi_data->GPUTicks = result->accumulator[1];
       mdapi_data->CoreFrequency = obj->oa.gt_frequency[1];
       mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1];
-      mdapi_data->SliceFrequency = (obj->oa.slice_frequency[0] + obj->oa.slice_frequency[1]) / 2ULL;
-      mdapi_data->UnsliceFrequency = (obj->oa.unslice_frequency[0] + obj->oa.unslice_frequency[1]) / 2ULL;
+      mdapi_data->SliceFrequency = (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
+      mdapi_data->UnsliceFrequency = (result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL;
 
       return sizeof(*mdapi_data);
    }
-- 
2.17.1



More information about the mesa-dev mailing list