[Mesa-dev] [PATCH 3/3] broadcom/vc4: Add support for HW perfmon
Boris Brezillon
boris.brezillon at free-electrons.com
Thu Dec 7 15:52:07 UTC 2017
The V3D engine provides several perf counters.
Implement ->get_driver_query_[group_]info() so that these counters are
exposed through the GL_AMD_performance_monitor extension.
Signed-off-by: Boris Brezillon <boris.brezillon at free-electrons.com>
---
src/gallium/drivers/vc4/vc4_context.h | 13 +++
src/gallium/drivers/vc4/vc4_job.c | 9 +-
src/gallium/drivers/vc4/vc4_query.c | 197 ++++++++++++++++++++++++++++++++--
src/gallium/drivers/vc4/vc4_screen.c | 7 ++
src/gallium/drivers/vc4/vc4_screen.h | 1 +
5 files changed, 215 insertions(+), 12 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index 4a1e4093f1a0..b6d9f041efc7 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -309,6 +309,11 @@ struct vc4_job {
struct vc4_job_key key;
};
+struct vc4_hwperfmon {
+ uint32_t id;
+ uint64_t counters[DRM_VC4_MAX_PERF_COUNTERS];
+};
+
struct vc4_context {
struct pipe_context base;
@@ -387,6 +392,8 @@ struct vc4_context {
struct pipe_viewport_state viewport;
struct vc4_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
struct vc4_vertexbuf_stateobj vertexbuf;
+
+ struct vc4_hwperfmon *perfmon;
/** @} */
};
@@ -444,6 +451,12 @@ vc4_sampler_state(struct pipe_sampler_state *psampler)
return (struct vc4_sampler_state *)psampler;
}
+int vc4_get_driver_query_group_info(struct pipe_screen *pscreen,
+ unsigned index,
+ struct pipe_driver_query_group_info *info);
+int vc4_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
+ struct pipe_driver_query_info *info);
+
struct pipe_context *vc4_context_create(struct pipe_screen *pscreen,
void *priv, unsigned flags);
void vc4_draw_init(struct pipe_context *pctx);
diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c
index fb0c5bbc78cf..f75a32565603 100644
--- a/src/gallium/drivers/vc4/vc4_job.c
+++ b/src/gallium/drivers/vc4/vc4_job.c
@@ -362,7 +362,7 @@ vc4_submit_setup_rcl_msaa_surface(struct vc4_job *job,
rsc->writes++;
}
-#define MAX_CHUNKS 1
+#define MAX_CHUNKS 2
/**
* Submits the job to the kernel and then reinitializes it.
@@ -467,6 +467,13 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
submit.uniforms = (uintptr_t)job->uniforms.base;
submit.uniforms_size = cl_offset(&job->uniforms);
+ if (vc4->perfmon && screen->has_extended_cl) {
+ chunks[nchunks].perfmon.type = VC4_PERFMON_CHUNK;
+ chunks[nchunks].perfmon.id = vc4->perfmon->id;
+ chunks[nchunks].perfmon.pad = 0;
+ nchunks++;
+ }
+
if (nchunks) {
submit.flags |= VC4_SUBMIT_CL_EXTENDED;
submit.cl_chunks = (uintptr_t)chunks;
diff --git a/src/gallium/drivers/vc4/vc4_query.c b/src/gallium/drivers/vc4/vc4_query.c
index ddf8f8fb0c2c..d6b081bb15d7 100644
--- a/src/gallium/drivers/vc4/vc4_query.c
+++ b/src/gallium/drivers/vc4/vc4_query.c
@@ -32,49 +32,224 @@
struct vc4_query
{
- uint8_t pad;
+ unsigned num_queries;
+ struct vc4_hwperfmon *hwperfmon;
};
+static const char *v3d_counter_names[] = {
+ "FEP-valid-primitives-no-rendered-pixels",
+ "FEP-valid-primitives-rendered-pixels",
+ "FEP-clipped-quads",
+ "FEP-valid-quads",
+ "TLB-quads-not-passing-stencil-test",
+ "TLB-quads-not-passing-z-and-stencil-test",
+ "TLB-quads-with-zero-coverage",
+ "TLB-quads-with-non-zero-coverage",
+ "TLB-quads-written-to-color-buffer",
+ "PTB-primitives-discarded-outside-viewport",
+ "PTB-primitives-need-clipping",
+ "PTB-primitives-discared-reversed",
+ "QPU-total-idle-clk-cycles",
+ "QPU-total-clk-cycles-vertex-coord-shading",
+ "QPU-total-clk-cycles-fragment-shading",
+ "QPU-total-clk-cycles-executing-valid-instr",
+ "QPU-total-clk-cycles-waiting-TMU",
+ "QPU-total-clk-cycles-waiting-scoreboard",
+ "QPU-total-clk-cycles-waiting-varyings",
+ "QPU-total-instr-cache-hit",
+ "QPU-total-instr-cache-miss",
+ "QPU-total-uniform-cache-hit",
+ "QPU-total-uniform-cache-miss",
+ "TMU-total-text-quads-processed",
+ "TMU-total-text-cache-miss",
+ "VPM-total-clk-cycles-VDW-stalled",
+ "VPM-total-clk-cycles-VCD-stalled",
+ "L2C-total-cache-hit",
+ "L2C-total-cache-miss",
+};
+
+int vc4_get_driver_query_group_info(struct pipe_screen *pscreen,
+ unsigned index,
+ struct pipe_driver_query_group_info *info)
+{
+ struct vc4_screen *screen = vc4_screen(pscreen);
+
+ if (!screen->has_perfmon_ioctl)
+ return 0;
+
+ if (!info)
+ return 1;
+
+ if (index > 0)
+ return 0;
+
+ info->name = "V3D counters";
+ info->max_active_queries = DRM_VC4_MAX_PERF_COUNTERS;
+ info->num_queries = ARRAY_SIZE(v3d_counter_names);
+ return 1;
+}
+
+int vc4_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
+ struct pipe_driver_query_info *info)
+{
+ struct vc4_screen *screen = vc4_screen(pscreen);
+
+ if (!screen->has_perfmon_ioctl)
+ return 0;
+
+ if (!info)
+ return ARRAY_SIZE(v3d_counter_names);
+
+ if (index >= ARRAY_SIZE(v3d_counter_names))
+ return 0;
+
+ info->name = v3d_counter_names[index];
+ info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
+ info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
+ info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
+ info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
+ return 1;
+}
+
static struct pipe_query *
-vc4_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
+vc4_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
+ unsigned *query_types)
{
+ struct vc4_context *ctx = vc4_context(pctx);
struct vc4_query *query = calloc(1, sizeof(*query));
+ struct drm_vc4_perfmon_create req;
+ struct vc4_hwperfmon *hwperfmon;
+ unsigned i, nhwqueries = 0;
+ int ret;
+
+ if (!query)
+ return NULL;
+
+ for (i = 0; i < num_queries; i++) {
+ if (query_types[i] >= PIPE_QUERY_DRIVER_SPECIFIC)
+ nhwqueries++;
+ }
+
+ /* We can't mix HW and non-HW queries. */
+ if (nhwqueries && nhwqueries != num_queries)
+ return NULL;
+
+ if (!nhwqueries)
+ return (struct pipe_query *)query;
+
+ hwperfmon = calloc(1, sizeof(*hwperfmon));
+ if (!hwperfmon)
+ goto err_free_query;
+
+ for (i = 0; i < num_queries; i++)
+ req.events[i] = query_types[i] - PIPE_QUERY_DRIVER_SPECIFIC;
+
+ req.ncounters = num_queries;
+ ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_CREATE, &req);
+ if (ret)
+ goto err_free_hwperfmon;
+
+ hwperfmon->id = req.id;
+ query->hwperfmon = hwperfmon;
+ query->num_queries = num_queries;
/* Note that struct pipe_query isn't actually defined anywhere. */
return (struct pipe_query *)query;
+
+err_free_hwperfmon:
+ free(hwperfmon);
+
+err_free_query:
+ free(query);
+
+ return NULL;
+}
+
+static struct pipe_query *
+vc4_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
+{
+ return vc4_create_batch_query(ctx, 1, &query_type);
}
static void
-vc4_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
+vc4_destroy_query(struct pipe_context *pctx, struct pipe_query *pquery)
{
+ struct vc4_context *ctx = vc4_context(pctx);
+ struct vc4_query *query = (struct vc4_query *)pquery;
+
+ if (query->hwperfmon) {
+ struct drm_vc4_perfmon_destroy req;
+
+ req.id = query->hwperfmon->id;
+ vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY, &req);
+ free(query->hwperfmon);
+ }
+
free(query);
}
static boolean
-vc4_begin_query(struct pipe_context *ctx, struct pipe_query *query)
+vc4_begin_query(struct pipe_context *pctx, struct pipe_query *pquery)
{
+ struct vc4_query *query = (struct vc4_query *)pquery;
+ struct vc4_context *ctx = vc4_context(pctx);
+
+ if (!query->hwperfmon)
+ return true;
+
+ /* Only one perfmon can be activated per context. */
+ if (ctx->perfmon)
+ return false;
+
+ ctx->perfmon = query->hwperfmon;
return true;
}
static bool
-vc4_end_query(struct pipe_context *ctx, struct pipe_query *query)
+vc4_end_query(struct pipe_context *pctx, struct pipe_query *pquery)
{
+ struct vc4_query *query = (struct vc4_query *)pquery;
+ struct vc4_context *ctx = vc4_context(pctx);
+
+ if (!query->hwperfmon)
+ return true;
+
+ if (ctx->perfmon != query->hwperfmon)
+ return false;
+
+ ctx->perfmon = NULL;
return true;
}
static boolean
-vc4_get_query_result(struct pipe_context *ctx, struct pipe_query *query,
+vc4_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery,
boolean wait, union pipe_query_result *vresult)
{
- uint64_t *result = &vresult->u64;
+ struct vc4_context *ctx = vc4_context(pctx);
+ struct vc4_query *query = (struct vc4_query *)pquery;
+ struct drm_vc4_perfmon_get_values req;
+ unsigned i;
+ int ret;
+
+ if (!query->hwperfmon) {
+ vresult->u64 = 0;
+ return true;
+ }
- *result = 0;
+ req.id = query->hwperfmon->id;
+ req.values_ptr = (uintptr_t)query->hwperfmon->counters;
+ ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_GET_VALUES, &req);
+ if (ret)
+ return false;
+
+ for (i = 0; i < query->num_queries; i++)
+ vresult[i].u64 = query->hwperfmon->counters[i];
return true;
}
static void
-vc4_set_active_query_state(struct pipe_context *pipe, boolean enable)
+vc4_set_active_query_state(struct pipe_context *pctx, boolean enable)
{
}
@@ -82,10 +257,10 @@ void
vc4_query_init(struct pipe_context *pctx)
{
pctx->create_query = vc4_create_query;
+ pctx->create_batch_query = vc4_create_batch_query;
pctx->destroy_query = vc4_destroy_query;
pctx->begin_query = vc4_begin_query;
pctx->end_query = vc4_end_query;
pctx->get_query_result = vc4_get_query_result;
- pctx->set_active_query_state = vc4_set_active_query_state;
+ pctx->set_active_query_state = vc4_set_active_query_state;
}
-
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index 4b63e940822d..2f784dda51b7 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -698,6 +698,8 @@ vc4_screen_create(int fd, struct renderonly *ro)
vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_MADVISE);
screen->has_extended_cl =
vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_EXTENDED_CL);
+ screen->has_perfmon_ioctl =
+ vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_PERFMON);
if (!vc4_get_chip_info(screen))
goto fail;
@@ -724,6 +726,11 @@ vc4_screen_create(int fd, struct renderonly *ro)
pscreen->get_compiler_options = vc4_screen_get_compiler_options;
pscreen->query_dmabuf_modifiers = vc4_screen_query_dmabuf_modifiers;
+ if (screen->has_perfmon_ioctl) {
+ pscreen->get_driver_query_group_info = vc4_get_driver_query_group_info;
+ pscreen->get_driver_query_info = vc4_get_driver_query_info;
+ }
+
return pscreen;
fail:
diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h
index 83719d88baf0..fe8f286d0da1 100644
--- a/src/gallium/drivers/vc4/vc4_screen.h
+++ b/src/gallium/drivers/vc4/vc4_screen.h
@@ -98,6 +98,7 @@ struct vc4_screen {
bool has_madvise;
bool has_tiling_ioctl;
bool has_extended_cl;
+ bool has_perfmon_ioctl;
struct vc4_simulator_file *sim_file;
};
--
2.11.0
More information about the mesa-dev
mailing list