[Mesa-dev] [PATCH 2/3] nvc0: Return value of appropriate type for performance metrics

Boyan Ding boyan.j.ding at gmail.com
Thu Jun 15 12:11:38 UTC 2017


Now that gallium hud properly handle floating point values. And it also
helps AMD_performance_monitor to return return correct values for non
integer types.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Signed-off-by: Boyan Ding <boyan.j.ding at gmail.com>
---
 .../drivers/nouveau/nvc0/nvc0_query_hw_metric.c    | 70 +++++++++++++---------
 1 file changed, 42 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
index 089af61820..6d4deaf2ba 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
@@ -498,53 +498,59 @@ nvc0_hw_metric_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
 static uint64_t
 sm20_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
 {
+   union pipe_query_result result;
+
+   result.u64 = 0;
    switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
    case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
       /* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */
       if (res64[1])
-         return ((res64[0] / (double)res64[1]) / 48) * 100;
+         result.f = ((res64[0] / (double)res64[1]) / 48) * 100;
       break;
    case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
       /* (branch / (branch + divergent_branch)) * 100 */
       if (res64[0] + res64[1])
-         return (res64[0] / (double)(res64[0] + res64[1])) * 100;
+         result.f = (res64[0] / (double)(res64[0] + res64[1])) * 100;
       break;
    case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
       /* inst_executed / warps_launched */
       if (res64[1])
-         return res64[0] / (double)res64[1];
+         result.u64 = res64[0] / (double)res64[1];
       break;
    case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
       /* (inst_issued - inst_executed) / inst_executed */
       if (res64[1])
-         return (res64[0] - res64[1]) / (double)res64[1];
+         result.u64 = (res64[0] - res64[1]) / (double)res64[1];
       break;
    case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
       /* inst_issued / active_cycles */
       if (res64[1])
-         return res64[0] / (double)res64[1];
+         result.u64 = res64[0] / (double)res64[1];
       break;
    case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
       /* ((inst_issued / 2) / active_cycles) * 100 */
       if (res64[1])
-         return ((res64[0] / 2) / (double)res64[1]) * 100;
+         result.f = ((res64[0] / 2) / (double)res64[1]) * 100;
       break;
    case NVC0_HW_METRIC_QUERY_IPC:
       /* inst_executed / active_cycles */
       if (res64[1])
-         return res64[0] / (double)res64[1];
+         result.u64 = res64[0] / (double)res64[1];
       break;
    default:
       debug_printf("invalid metric type: %d\n",
                    hq->base.type - NVC0_HW_METRIC_QUERY(0));
       break;
    }
-   return 0;
+   return result.u64;
 }
 
 static uint64_t
 sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
 {
+   union pipe_query_result result;
+
+   result.u64 = 0;
    switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
    case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
       return sm20_hw_metric_calc_result(hq, res64);
@@ -552,31 +558,31 @@ sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
       return sm20_hw_metric_calc_result(hq, res64);
    case NVC0_HW_METRIC_QUERY_INST_ISSUED:
       /* issued1_0 + issued1_1 + (issued2_0 + issued2_1) * 2 */
-      return res64[0] + res64[1] + (res64[2] + res64[3]) * 2;
+      result.u64 = res64[0] + res64[1] + (res64[2] + res64[3]) * 2;
       break;
    case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
       return sm20_hw_metric_calc_result(hq, res64);
    case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
       /* (metric-inst_issued - inst_executed) / inst_executed */
       if (res64[4])
-         return (((res64[0] + res64[1] + (res64[2] + res64[3]) * 2) -
-                   res64[4]) / (double)res64[4]);
+         result.u64 = (((res64[0] + res64[1] + (res64[2] + res64[3]) * 2) -
+                         res64[4]) / (double)res64[4]);
       break;
    case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
       /* metric-inst_issued / active_cycles */
       if (res64[4])
-         return (res64[0] + res64[1] + (res64[2] + res64[3]) * 2) /
-                (double)res64[4];
+         result.u64 = (res64[0] + res64[1] + (res64[2] + res64[3]) * 2) /
+                      (double)res64[4];
       break;
    case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
       /* issued1_0 + issued1_1 + issued2_0 + issued2_1 */
-      return res64[0] + res64[1] + res64[2] + res64[3];
+      result.u64 = res64[0] + res64[1] + res64[2] + res64[3];
       break;
    case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
       /* ((metric-issue_slots / 2) / active_cycles) * 100 */
       if (res64[4])
-         return (((res64[0] + res64[1] + res64[2] + res64[3]) / 2) /
-                 (double)res64[4]) * 100;
+         result.f =  (((res64[0] + res64[1] + res64[2] + res64[3]) / 2) /
+                      (double)res64[4]) * 100;
       break;
    case NVC0_HW_METRIC_QUERY_IPC:
       return sm20_hw_metric_calc_result(hq, res64);
@@ -585,78 +591,86 @@ sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
                    hq->base.type - NVC0_HW_METRIC_QUERY(0));
       break;
    }
-   return 0;
+   return result.u64;
 }
 
 static uint64_t
 sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
 {
+   union pipe_query_result result;
+
+   result.u64 = 0;
    switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
    case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
       /* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */
       if (res64[1])
-         return ((res64[0] / (double)res64[1]) / 64) * 100;
+         result.f = ((res64[0] / (double)res64[1]) / 64) * 100;
       break;
    case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
       return sm20_hw_metric_calc_result(hq, res64);
    case NVC0_HW_METRIC_QUERY_INST_ISSUED:
       /* inst_issued1 + inst_issued2 * 2 */
-      return res64[0] + res64[1] * 2;
+      result.u64 = res64[0] + res64[1] * 2;
+      break;
    case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
       return sm20_hw_metric_calc_result(hq, res64);
    case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
       /* (metric-inst_issued - inst_executed) / inst_executed */
       if (res64[2])
-         return (((res64[0] + res64[1] * 2) - res64[2]) / (double)res64[2]);
+         result.u64 = (((res64[0] + res64[1] * 2) - res64[2]) / (double)res64[2]);
       break;
    case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
       /* metric-inst_issued / active_cycles */
       if (res64[2])
-         return (res64[0] + res64[1] * 2) / (double)res64[2];
+         result.u64 = (res64[0] + res64[1] * 2) / (double)res64[2];
       break;
    case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
       /* inst_issued1 + inst_issued2 */
-      return res64[0] + res64[1];
+      result.u64 = res64[0] + res64[1];
+      break;
    case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
       /* ((metric-issue_slots / 2) / active_cycles) * 100 */
       if (res64[2])
-         return (((res64[0] + res64[1]) / 2) / (double)res64[2]) * 100;
+         result.f = (((res64[0] + res64[1]) / 2) / (double)res64[2]) * 100;
       break;
    case NVC0_HW_METRIC_QUERY_IPC:
       return sm20_hw_metric_calc_result(hq, res64);
    case NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD:
       /* (shared_load_replay + shared_store_replay) / inst_executed */
       if (res64[2])
-         return (res64[0] + res64[1]) / (double)res64[2];
+         result.u64 = (res64[0] + res64[1]) / (double)res64[2];
       break;
    case NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY:
       /* thread_inst_executed / (inst_executed * max. number of threads per
        * wrap) * 100 */
       if (res64[0])
-         return (res64[1] / ((double)res64[0] * 32)) * 100;
+         result.f = (res64[1] / ((double)res64[0] * 32)) * 100;
       break;
    default:
       debug_printf("invalid metric type: %d\n",
                    hq->base.type - NVC0_HW_METRIC_QUERY(0));
       break;
    }
-   return 0;
+   return result.u64;
 }
 
 static uint64_t
 sm35_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
 {
+   union pipe_query_result result;
+
+   result.u64 = 0;
    switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
    case NVC0_HW_METRIC_QUERY_WARP_NONPRED_EXECUTION_EFFICIENCY:
       /* not_predicated_off_thread_inst_executed / (inst_executed * max. number
        * of threads per wrap) * 100 */
       if (res64[0])
-         return (res64[1] / ((double)res64[0] * 32)) * 100;
+         result.f = (res64[1] / ((double)res64[0] * 32)) * 100;
       break;
    default:
       return sm30_hw_metric_calc_result(hq, res64);
    }
-   return 0;
+   return result.u64;
 }
 
 static boolean
-- 
2.13.1



More information about the mesa-dev mailing list