[Mesa-dev] [PATCH 2/3] nvc0: Return value of appropriate type for performance metrics
Boyan Ding
boyan.j.ding at gmail.com
Thu Jun 15 12:11:38 UTC 2017
Now that gallium hud properly handle floating point values. And it also
helps AMD_performance_monitor to return return correct values for non
integer types.
Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Signed-off-by: Boyan Ding <boyan.j.ding at gmail.com>
---
.../drivers/nouveau/nvc0/nvc0_query_hw_metric.c | 70 +++++++++++++---------
1 file changed, 42 insertions(+), 28 deletions(-)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
index 089af61820..6d4deaf2ba 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
@@ -498,53 +498,59 @@ nvc0_hw_metric_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
static uint64_t
sm20_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
{
+ union pipe_query_result result;
+
+ result.u64 = 0;
switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
/* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */
if (res64[1])
- return ((res64[0] / (double)res64[1]) / 48) * 100;
+ result.f = ((res64[0] / (double)res64[1]) / 48) * 100;
break;
case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
/* (branch / (branch + divergent_branch)) * 100 */
if (res64[0] + res64[1])
- return (res64[0] / (double)(res64[0] + res64[1])) * 100;
+ result.f = (res64[0] / (double)(res64[0] + res64[1])) * 100;
break;
case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
/* inst_executed / warps_launched */
if (res64[1])
- return res64[0] / (double)res64[1];
+ result.u64 = res64[0] / (double)res64[1];
break;
case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
/* (inst_issued - inst_executed) / inst_executed */
if (res64[1])
- return (res64[0] - res64[1]) / (double)res64[1];
+ result.u64 = (res64[0] - res64[1]) / (double)res64[1];
break;
case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
/* inst_issued / active_cycles */
if (res64[1])
- return res64[0] / (double)res64[1];
+ result.u64 = res64[0] / (double)res64[1];
break;
case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
/* ((inst_issued / 2) / active_cycles) * 100 */
if (res64[1])
- return ((res64[0] / 2) / (double)res64[1]) * 100;
+ result.f = ((res64[0] / 2) / (double)res64[1]) * 100;
break;
case NVC0_HW_METRIC_QUERY_IPC:
/* inst_executed / active_cycles */
if (res64[1])
- return res64[0] / (double)res64[1];
+ result.u64 = res64[0] / (double)res64[1];
break;
default:
debug_printf("invalid metric type: %d\n",
hq->base.type - NVC0_HW_METRIC_QUERY(0));
break;
}
- return 0;
+ return result.u64;
}
static uint64_t
sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
{
+ union pipe_query_result result;
+
+ result.u64 = 0;
switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
return sm20_hw_metric_calc_result(hq, res64);
@@ -552,31 +558,31 @@ sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
return sm20_hw_metric_calc_result(hq, res64);
case NVC0_HW_METRIC_QUERY_INST_ISSUED:
/* issued1_0 + issued1_1 + (issued2_0 + issued2_1) * 2 */
- return res64[0] + res64[1] + (res64[2] + res64[3]) * 2;
+ result.u64 = res64[0] + res64[1] + (res64[2] + res64[3]) * 2;
break;
case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
return sm20_hw_metric_calc_result(hq, res64);
case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
/* (metric-inst_issued - inst_executed) / inst_executed */
if (res64[4])
- return (((res64[0] + res64[1] + (res64[2] + res64[3]) * 2) -
- res64[4]) / (double)res64[4]);
+ result.u64 = (((res64[0] + res64[1] + (res64[2] + res64[3]) * 2) -
+ res64[4]) / (double)res64[4]);
break;
case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
/* metric-inst_issued / active_cycles */
if (res64[4])
- return (res64[0] + res64[1] + (res64[2] + res64[3]) * 2) /
- (double)res64[4];
+ result.u64 = (res64[0] + res64[1] + (res64[2] + res64[3]) * 2) /
+ (double)res64[4];
break;
case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
/* issued1_0 + issued1_1 + issued2_0 + issued2_1 */
- return res64[0] + res64[1] + res64[2] + res64[3];
+ result.u64 = res64[0] + res64[1] + res64[2] + res64[3];
break;
case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
/* ((metric-issue_slots / 2) / active_cycles) * 100 */
if (res64[4])
- return (((res64[0] + res64[1] + res64[2] + res64[3]) / 2) /
- (double)res64[4]) * 100;
+ result.f = (((res64[0] + res64[1] + res64[2] + res64[3]) / 2) /
+ (double)res64[4]) * 100;
break;
case NVC0_HW_METRIC_QUERY_IPC:
return sm20_hw_metric_calc_result(hq, res64);
@@ -585,78 +591,86 @@ sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
hq->base.type - NVC0_HW_METRIC_QUERY(0));
break;
}
- return 0;
+ return result.u64;
}
static uint64_t
sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
{
+ union pipe_query_result result;
+
+ result.u64 = 0;
switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
/* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */
if (res64[1])
- return ((res64[0] / (double)res64[1]) / 64) * 100;
+ result.f = ((res64[0] / (double)res64[1]) / 64) * 100;
break;
case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
return sm20_hw_metric_calc_result(hq, res64);
case NVC0_HW_METRIC_QUERY_INST_ISSUED:
/* inst_issued1 + inst_issued2 * 2 */
- return res64[0] + res64[1] * 2;
+ result.u64 = res64[0] + res64[1] * 2;
+ break;
case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
return sm20_hw_metric_calc_result(hq, res64);
case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
/* (metric-inst_issued - inst_executed) / inst_executed */
if (res64[2])
- return (((res64[0] + res64[1] * 2) - res64[2]) / (double)res64[2]);
+ result.u64 = (((res64[0] + res64[1] * 2) - res64[2]) / (double)res64[2]);
break;
case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
/* metric-inst_issued / active_cycles */
if (res64[2])
- return (res64[0] + res64[1] * 2) / (double)res64[2];
+ result.u64 = (res64[0] + res64[1] * 2) / (double)res64[2];
break;
case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
/* inst_issued1 + inst_issued2 */
- return res64[0] + res64[1];
+ result.u64 = res64[0] + res64[1];
+ break;
case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
/* ((metric-issue_slots / 2) / active_cycles) * 100 */
if (res64[2])
- return (((res64[0] + res64[1]) / 2) / (double)res64[2]) * 100;
+ result.f = (((res64[0] + res64[1]) / 2) / (double)res64[2]) * 100;
break;
case NVC0_HW_METRIC_QUERY_IPC:
return sm20_hw_metric_calc_result(hq, res64);
case NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD:
/* (shared_load_replay + shared_store_replay) / inst_executed */
if (res64[2])
- return (res64[0] + res64[1]) / (double)res64[2];
+ result.u64 = (res64[0] + res64[1]) / (double)res64[2];
break;
case NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY:
/* thread_inst_executed / (inst_executed * max. number of threads per
* wrap) * 100 */
if (res64[0])
- return (res64[1] / ((double)res64[0] * 32)) * 100;
+ result.f = (res64[1] / ((double)res64[0] * 32)) * 100;
break;
default:
debug_printf("invalid metric type: %d\n",
hq->base.type - NVC0_HW_METRIC_QUERY(0));
break;
}
- return 0;
+ return result.u64;
}
static uint64_t
sm35_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
{
+ union pipe_query_result result;
+
+ result.u64 = 0;
switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
case NVC0_HW_METRIC_QUERY_WARP_NONPRED_EXECUTION_EFFICIENCY:
/* not_predicated_off_thread_inst_executed / (inst_executed * max. number
* of threads per wrap) * 100 */
if (res64[0])
- return (res64[1] / ((double)res64[0] * 32)) * 100;
+ result.f = (res64[1] / ((double)res64[0] * 32)) * 100;
break;
default:
return sm30_hw_metric_calc_result(hq, res64);
}
- return 0;
+ return result.u64;
}
static boolean
--
2.13.1
More information about the mesa-dev
mailing list