[Mesa-dev] [PATCH 6/7] nvc0: re-introduce performance metrics for SM30 (Kepler)
Samuel Pitoiset
samuel.pitoiset at gmail.com
Wed Dec 2 14:49:50 PST 2015
This implements more performance metrics than the previous support,
but some other metrics still need to be figured out.
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
.../drivers/nouveau/nvc0/nvc0_query_hw_metric.c | 176 ++++++++++++++++++++-
.../drivers/nouveau/nvc0/nvc0_query_hw_metric.h | 17 ++
2 files changed, 188 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
index fb2806a..756ba4e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
@@ -157,6 +157,109 @@ static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] =
#undef _SM
#undef _M
+/* === PERFORMANCE MONITORING METRICS for NVE4+ === */
+static const char *nve4_hw_metric_names[] =
+{
+ "metric-achieved_occupancy",
+ "metric-branch_efficiency",
+ "metric-inst_issued",
+ "metric-inst_per_wrap",
+ "metric-inst_replay_overhead",
+ "metric-issued_ipc",
+ "metric-issue_slots",
+ "metric-issue_slot_utilization",
+ "metric-ipc",
+ "metric-shared_replay_overhead",
+};
+
+#define _SM(n) NVE4_HW_SM_QUERY(NVE4_HW_SM_QUERY_ ##n)
+#define _M(n, c) [NVE4_HW_METRIC_QUERY_##n] = c
+
+/* ==== Compute capability 3.0 (GK104/GK106/GK107) ==== */
+static const struct nvc0_hw_metric_query_cfg
+sm30_achieved_occupancy =
+{
+ .queries[0] = _SM(ACTIVE_WARPS),
+ .queries[1] = _SM(ACTIVE_CYCLES),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm30_branch_efficiency =
+{
+ .queries[0] = _SM(BRANCH),
+ .queries[1] = _SM(DIVERGENT_BRANCH),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm30_inst_issued =
+{
+ .queries[0] = _SM(INST_ISSUED1),
+ .queries[1] = _SM(INST_ISSUED2),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm30_inst_per_wrap =
+{
+ .queries[0] = _SM(INST_EXECUTED),
+ .queries[1] = _SM(WARPS_LAUNCHED),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm30_inst_replay_overhead =
+{
+ .queries[0] = _SM(INST_ISSUED1),
+ .queries[1] = _SM(INST_ISSUED2),
+ .queries[2] = _SM(INST_EXECUTED),
+ .num_queries = 3,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm30_issued_ipc =
+{
+ .queries[0] = _SM(INST_ISSUED1),
+ .queries[1] = _SM(INST_ISSUED2),
+ .queries[2] = _SM(ACTIVE_CYCLES),
+ .num_queries = 3,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm30_ipc =
+{
+ .queries[0] = _SM(INST_EXECUTED),
+ .queries[1] = _SM(ACTIVE_CYCLES),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm30_shared_replay_overhead =
+{
+ .queries[0] = _SM(SHARED_LD_REPLAY),
+ .queries[1] = _SM(SHARED_ST_REPLAY),
+ .queries[2] = _SM(INST_EXECUTED),
+ .num_queries = 3,
+};
+
+static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] =
+{
+ _M(ACHIEVED_OCCUPANCY, &sm30_achieved_occupancy),
+ _M(BRANCH_EFFICIENCY, &sm30_branch_efficiency),
+ _M(INST_ISSUED, &sm30_inst_issued),
+ _M(INST_PER_WRAP, &sm30_inst_per_wrap),
+ _M(INST_REPLAY_OVERHEAD, &sm30_inst_replay_overhead),
+ _M(ISSUED_IPC, &sm30_issued_ipc),
+ _M(ISSUE_SLOTS, &sm30_inst_issued),
+ _M(ISSUE_SLOT_UTILIZATION, &sm30_issued_ipc),
+ _M(IPC, &sm30_ipc),
+ _M(SHARED_REPLAY_OVERHEAD, &sm30_shared_replay_overhead),
+};
+
+#undef _SM
+#undef _M
+
static inline const struct nvc0_hw_metric_query_cfg **
nvc0_hw_metric_get_queries(struct nvc0_screen *screen)
{
@@ -175,6 +278,9 @@ nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0,
struct nvc0_screen *screen = nvc0->screen;
struct nvc0_query *q = &hq->base;
+ if (screen->base.class_3d >= NVE4_3D_CLASS)
+ return sm30_hw_metric_queries[q->type - NVE4_HW_METRIC_QUERY(0)];
+
queries = nvc0_hw_metric_get_queries(screen);
return queries[q->type - NVC0_HW_METRIC_QUERY(0)];
}
@@ -309,6 +415,52 @@ sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
return 0;
}
+static uint64_t
+sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
+{
+ switch (hq->base.type - NVE4_HW_METRIC_QUERY(0)) {
+ case NVE4_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
+ return sm20_hw_metric_calc_result(hq, res64);
+ case NVE4_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
+ return sm20_hw_metric_calc_result(hq, res64);
+ case NVE4_HW_METRIC_QUERY_INST_ISSUED:
+ /* inst_issued1 + inst_issued2 * 2 */
+ return res64[0] + res64[1] * 2;
+ case NVE4_HW_METRIC_QUERY_INST_PER_WRAP:
+ return sm20_hw_metric_calc_result(hq, res64);
+ case NVE4_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
+ /* (metric-inst_issued - inst_executed) / inst_executed */
+ if (res64[2])
+ return (((res64[0] + res64[1] * 2) - res64[2]) / (double)res64[2]);
+ break;
+ case NVE4_HW_METRIC_QUERY_ISSUED_IPC:
+ /* metric-inst_issued / active_cycles */
+ if (res64[2])
+ return (res64[0] + res64[1] * 2) / (double)res64[2];
+ break;
+ case NVE4_HW_METRIC_QUERY_ISSUE_SLOTS:
+ /* inst_issued1 + inst_issued2 */
+ return res64[0] + res64[1];
+ case NVE4_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
+ /* ((metric-issue_slots / 2) / active_cycles) * 100 */
+ if (res64[2])
+ return (((res64[0] + res64[1]) / 2) / (double)res64[2]) * 100;
+ break;
+ case NVE4_HW_METRIC_QUERY_IPC:
+ return sm20_hw_metric_calc_result(hq, res64);
+ case NVE4_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD:
+ /* (shared_load_replay + shared_store_replay) / inst_executed */
+ if (res64[2])
+ return (res64[0] + res64[1]) / (double)res64[2];
+ break;
+ default:
+ debug_printf("invalid metric type: %d\n",
+ hq->base.type - NVE4_HW_METRIC_QUERY(0));
+ break;
+ }
+ return 0;
+}
+
static boolean
nvc0_hw_metric_get_query_result(struct nvc0_context *nvc0,
struct nvc0_hw_query *hq, boolean wait,
@@ -331,10 +483,14 @@ nvc0_hw_metric_get_query_result(struct nvc0_context *nvc0,
res64[i] = *(uint64_t *)&results[i];
}
- if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
- value = sm20_hw_metric_calc_result(hq, res64);
- else
- value = sm21_hw_metric_calc_result(hq, res64);
+ if (screen->base.class_3d >= NVE4_3D_CLASS) {
+ value = sm30_hw_metric_calc_result(hq, res64);
+ } else {
+ if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
+ value = sm20_hw_metric_calc_result(hq, res64);
+ else
+ value = sm21_hw_metric_calc_result(hq, res64);
+ }
*(uint64_t *)result = value;
return ret;
@@ -355,7 +511,8 @@ nvc0_hw_metric_create_query(struct nvc0_context *nvc0, unsigned type)
struct nvc0_hw_query *hq;
unsigned i;
- if (type < NVC0_HW_METRIC_QUERY(0) || type > NVC0_HW_METRIC_QUERY_LAST)
+ if ((type < NVE4_HW_METRIC_QUERY(0) || type > NVE4_HW_METRIC_QUERY_LAST) &&
+ (type < NVC0_HW_METRIC_QUERY(0) || type > NVC0_HW_METRIC_QUERY_LAST))
return NULL;
hmq = CALLOC_STRUCT(nvc0_hw_metric_query);
@@ -406,6 +563,9 @@ nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
if (screen->base.device->drm_version >= 0x01000101) {
if (screen->compute) {
+ if (screen->base.class_3d == NVE4_3D_CLASS) {
+ count += NVE4_HW_METRIC_QUERY_COUNT;
+ } else
if (class_3d < NVE4_3D_CLASS) {
const struct nvc0_hw_metric_query_cfg **queries =
nvc0_hw_metric_get_queries(screen);
@@ -424,6 +584,12 @@ nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
if (id < count) {
if (screen->compute) {
+ if (screen->base.class_3d == NVE4_3D_CLASS) {
+ info->name = nve4_hw_metric_names[id];
+ info->query_type = NVE4_HW_METRIC_QUERY(id);
+ info->group_id = -1;
+ return 1;
+ } else
if (class_3d < NVE4_3D_CLASS) {
const struct nvc0_hw_metric_query_cfg **queries =
nvc0_hw_metric_get_queries(screen);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h
index 95675fd..06cb355 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h
@@ -18,6 +18,23 @@ nvc0_hw_metric_query(struct nvc0_hw_query *hq)
/*
* Driver metrics queries:
*/
+#define NVE4_HW_METRIC_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 3072 + (i))
+#define NVE4_HW_METRIC_QUERY_LAST NVE4_HW_METRIC_QUERY(NVE4_HW_METRIC_QUERY_COUNT - 1)
+enum nve4_hw_metric_queries
+{
+ NVE4_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY = 0,
+ NVE4_HW_METRIC_QUERY_BRANCH_EFFICIENCY,
+ NVE4_HW_METRIC_QUERY_INST_ISSUED,
+ NVE4_HW_METRIC_QUERY_INST_PER_WRAP,
+ NVE4_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
+ NVE4_HW_METRIC_QUERY_ISSUED_IPC,
+ NVE4_HW_METRIC_QUERY_ISSUE_SLOTS,
+ NVE4_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
+ NVE4_HW_METRIC_QUERY_IPC,
+ NVE4_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD,
+ NVE4_HW_METRIC_QUERY_COUNT
+};
+
#define NVC0_HW_METRIC_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 3072 + (i))
#define NVC0_HW_METRIC_QUERY_LAST NVC0_HW_METRIC_QUERY(NVC0_HW_METRIC_QUERY_COUNT - 1)
enum nvc0_hw_metric_queries
--
2.6.2
More information about the mesa-dev
mailing list