[Mesa-dev] [PATCH 2/2] i965: Add support for GL_AMD_performance_monitor on Ironlake.
Kenneth Graunke
kenneth at whitecape.org
Thu Apr 11 14:00:59 PDT 2013
Ironlake's counters are always enabled; userspace can simply send a
MI_REPROT_PERF_COUNT packet to take a snapshot of them. This makes it
easy to implement.
The counters are documented in the source code for the intel-gpu-tools
intel_perf_counters utility.
Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
src/mesa/drivers/dri/i965/Makefile.sources | 1 +
src/mesa/drivers/dri/i965/brw_context.c | 4 +
src/mesa/drivers/dri/i965/brw_context.h | 7 +
src/mesa/drivers/dri/i965/brw_defines.h | 7 +
.../drivers/dri/i965/brw_performance_monitor.c | 372 +++++++++++++++++++++
src/mesa/drivers/dri/intel/intel_extensions.c | 3 +
6 files changed, 394 insertions(+)
create mode 100644 src/mesa/drivers/dri/i965/brw_performance_monitor.c
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index be8d630..a9c2754 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -68,6 +68,7 @@ i965_FILES = \
brw_gs_state.c \
brw_lower_texture_gradients.cpp \
brw_misc_state.c \
+ brw_performance_monitor.c \
brw_program.c \
brw_primitive_restart.c \
brw_queryobj.c \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index ceaf325..b8bb1b5 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -415,6 +415,10 @@ brwCreateContext(int api,
_mesa_initialize_dispatch_tables(ctx);
_mesa_initialize_vbo_vtxfmt(ctx);
+ if (ctx->Extensions.AMD_performance_monitor) {
+ brw_init_performance_monitors(brw);
+ }
+
return true;
}
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 114c369..4a203a2 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1060,6 +1060,10 @@ struct brw_context
bool begin_emitted;
} query;
+ struct {
+ uint32_t total_counter_size;
+ } perfmon;
+
int num_atoms;
const struct brw_tracked_state **atoms;
@@ -1212,6 +1216,9 @@ void brw_upload_ubo_surfaces(struct brw_context *brw,
struct gl_shader *shader,
uint32_t *surf_offsets);
+/* brw_performance_monitor.c */
+void brw_init_performance_monitors(struct brw_context *brw);
+
/* gen6_sol.c */
void
brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index a13f9dc..1fea1d8 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1649,6 +1649,13 @@ enum brw_wm_barycentric_interp_mode {
#define CMD_MI_FLUSH 0x0200
+#define GEN5_MI_REPORT_PERF_COUNT ((0x26 << 23) | (3 - 2))
+/* DW0 */
+# define GEN5_MI_COUNTER_SET_0 (0 << 6)
+# define GEN5_MI_COUNTER_SET_1 (1 << 6)
+/* DW1 */
+# define MI_COUNTER_ADDRESS_GTT (1 << 0)
+/* DW2: a user-defined report ID (written to the buffer but can be anything) */
/* Bitfields for the URB_WRITE message, DW2 of message header: */
#define URB_WRITE_PRIM_END 0x1
diff --git a/src/mesa/drivers/dri/i965/brw_performance_monitor.c b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
new file mode 100644
index 0000000..b351193
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
@@ -0,0 +1,372 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file brw_performance_monitor.c
+ *
+ * Implementation of the GL_AMD_performance_monitor extension.
+ *
+ * Currently only for Ironlake.
+ */
+
+#include <limits.h>
+
+#include "main/bitset.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/performance_monitor.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+/**
+ * i965 representation of a performance monitor object.
+ */
+struct brw_perf_monitor_object
+{
+ /** The base class. */
+ struct gl_perf_monitor_object base;
+
+ /**
+ * The GPU-facing BO, holding raw counter data in a hardware specific form.
+ */
+ drm_intel_bo *gpu_facing_bo;
+};
+
+/** Downcasting convenience macro. */
+static inline struct brw_perf_monitor_object *
+brw_perf_monitor(struct gl_perf_monitor_object *m)
+{
+ return (struct brw_perf_monitor_object *) m;
+}
+
+/******************************************************************************/
+
+/**
+ * Group information:
+ * @{
+ */
+enum group_id {
+ A_COUNTERS = 0,
+};
+
+const static struct gl_perf_monitor_group perf_groups[] = {
+ [A_COUNTERS] = { "Aggregating Counters", INT_MAX }
+};
+/** @} */
+
+
+/**
+ * Ironlake counter information:
+ * @{
+ */
+enum gen5_counter_id {
+ GEN5_FIRST_A_COUNTER = 0,
+
+ GEN5_CS_STARVED = GEN5_FIRST_A_COUNTER,
+ GEN5_CS_STALLED,
+ GEN5_VF_STARVED,
+ GEN5_VF_STALLED,
+ GEN5_VS_STARVED,
+ GEN5_VS_STALLED,
+ GEN5_GS_STARVED,
+ GEN5_GS_STALLED,
+ GEN5_CL_STARVED,
+ GEN5_CL_STALLED,
+ GEN5_SF_STARVED,
+ GEN5_SF_STALLED,
+ GEN5_WZ_STARVED,
+ GEN5_WZ_STALLED,
+ GEN5_Z_BUFFER_READ_WRITE,
+ GEN5_EU_ACTIVE,
+ GEN5_EU_SUSPENDED,
+ GEN5_THREADS_LOADED,
+ GEN5_FILTERING_ACTIVE,
+ GEN5_PS_EXECUTED,
+ GEN5_SUBSPANS_WRITTEN,
+ GEN5_BYTES_READ_FOR_TEXTURE_READS,
+ GEN5_TEXELS_RETURNED_FROM_SAMPLER,
+ GEN5_POLYGONS_NOT_CULLED,
+ GEN5_MASF_HAS_VALID_MESSAGE,
+ GEN5_WRITES_READS_FROM_RC,
+ GEN5_DP_READS,
+ GEN5_MASF_HAS_VALID_MESSAGE_NOT_CONSUMED_BY_SAMPLER,
+ GEN5_EU_STALLED_FOR_MATH,
+
+ GEN5_LAST_A_COUNTER = GEN5_FIRST_A_COUNTER + 28,
+};
+
+#define A_COUNTER(id, name) \
+ { \
+ .ID = id, \
+ .Name = name, \
+ .GroupID = A_COUNTERS, \
+ .Type = GL_UNSIGNED_INT, \
+ .Minimum = { .u32 = 0 }, \
+ .Maximum = { .u32 = ~0 }, \
+ }
+
+const static struct gl_perf_monitor_counter gen5_counters[] = {
+ A_COUNTER(GEN5_CS_STARVED, "cycles the CS unit is starved"),
+ A_COUNTER(GEN5_CS_STALLED, "cycles the CS unit is stalled"),
+ A_COUNTER(GEN5_VF_STARVED, "cycles the VF unit is starved"),
+ A_COUNTER(GEN5_VF_STALLED, "cycles the VF unit is stalled"),
+ A_COUNTER(GEN5_VS_STARVED, "cycles the VS unit is starved"),
+ A_COUNTER(GEN5_VS_STALLED, "cycles the VS unit is stalled"),
+ A_COUNTER(GEN5_GS_STARVED, "cycles the GS unit is starved"),
+ A_COUNTER(GEN5_GS_STALLED, "cycles the GS unit is stalled"),
+ A_COUNTER(GEN5_CL_STARVED, "cycles the CL unit is starved"),
+ A_COUNTER(GEN5_CL_STALLED, "cycles the CL unit is stalled"),
+ A_COUNTER(GEN5_SF_STARVED, "cycles the SF unit is starved"),
+ A_COUNTER(GEN5_SF_STALLED, "cycles the SF unit is stalled"),
+ A_COUNTER(GEN5_WZ_STARVED, "cycles the WZ unit is starved"),
+ A_COUNTER(GEN5_WZ_STALLED, "cycles the WZ unit is stalled"),
+ A_COUNTER(GEN5_Z_BUFFER_READ_WRITE, "Z buffer read/write"),
+ A_COUNTER(GEN5_EU_ACTIVE, "cycles each EU was active"),
+ A_COUNTER(GEN5_EU_SUSPENDED, "cycles each EU was suspended"),
+ A_COUNTER(GEN5_THREADS_LOADED, "cycles threads loaded all EUs"),
+ A_COUNTER(GEN5_FILTERING_ACTIVE, "cycles filtering active"),
+ A_COUNTER(GEN5_PS_EXECUTED, "cycles PS threads executed"),
+ A_COUNTER(GEN5_SUBSPANS_WRITTEN, "subspans written to RC"),
+ A_COUNTER(GEN5_BYTES_READ_FOR_TEXTURE_READS, "bytes read for texture reads"),
+ A_COUNTER(GEN5_TEXELS_RETURNED_FROM_SAMPLER, "texels returned from sampler"),
+ A_COUNTER(GEN5_POLYGONS_NOT_CULLED, "polygons not culled"),
+ A_COUNTER(GEN5_MASF_HAS_VALID_MESSAGE, "clocks MASF has valid message"),
+ A_COUNTER(GEN5_WRITES_READS_FROM_RC, "64b writes/reads from RC"),
+ A_COUNTER(GEN5_DP_READS, "reads on dataport"),
+ A_COUNTER(GEN5_MASF_HAS_VALID_MESSAGE_NOT_CONSUMED_BY_SAMPLER, "clocks MASF has valid msg not consumed by sampler"),
+ A_COUNTER(GEN5_EU_STALLED_FOR_MATH, "cycles any EU is stalled for math"),
+};
+/** @} */
+
+/******************************************************************************/
+
+static void
+snapshot_aggregating_counters(struct brw_context *brw,
+ drm_intel_bo *bo, uint32_t offset)
+{
+ struct intel_context *intel = &brw->intel;
+
+ BEGIN_BATCH(6);
+ OUT_BATCH(GEN5_MI_REPORT_PERF_COUNT | GEN5_MI_COUNTER_SET_0);
+ OUT_RELOC(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ offset);
+ OUT_BATCH(0);
+
+ OUT_BATCH(GEN5_MI_REPORT_PERF_COUNT | GEN5_MI_COUNTER_SET_1);
+ OUT_RELOC(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ offset + 64);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+}
+
+static bool
+aggregating_counters_needed(struct brw_context *brw,
+ struct gl_perf_monitor_object *m)
+{
+ struct intel_context *intel = &brw->intel;
+
+ if (intel->gen == 5) {
+ return BITSET_TEST_RANGE(m->ActiveCounters,
+ GEN5_FIRST_A_COUNTER, GEN5_LAST_A_COUNTER);
+ }
+ assert(!"Unsupported generation in performance counter code.");
+ return false;
+}
+
+/******************************************************************************/
+
+/**
+ * Create a new performance monitor object.
+ */
+static struct gl_perf_monitor_object *
+brw_new_perf_monitor()
+{
+ return calloc(1, sizeof(struct brw_perf_monitor_object));
+}
+
+/**
+ * Delete a performance monitor object.
+ */
+static void
+brw_delete_perf_monitor(struct gl_perf_monitor_object *m)
+{
+ struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
+
+ if (monitor->gpu_facing_bo)
+ drm_intel_bo_unreference(monitor->gpu_facing_bo);
+
+ free(monitor);
+}
+
+/**
+ * Driver hook for glBeginPerformanceMonitorAMD().
+ */
+static void
+brw_begin_perf_monitor(struct gl_context *ctx,
+ struct gl_perf_monitor_object *m)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct intel_context *intel = &brw->intel;
+ struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
+
+ m->Active = true;
+
+ /* If the GPU-facing BO already exists, throw it away. It contains
+ * old results and we're not interested in those any more.
+ */
+ if (monitor->gpu_facing_bo)
+ drm_intel_bo_unreference(monitor->gpu_facing_bo);
+
+ /* Create a new GPU-facing BO */
+ monitor->gpu_facing_bo =
+ drm_intel_bo_alloc(intel->bufmgr, "performance monitor", 4096, 1);
+
+ /* Take a shapshot of all active counters */
+ if (aggregating_counters_needed(brw, m)) {
+ snapshot_aggregating_counters(brw, monitor->gpu_facing_bo, 0);
+ }
+}
+
+/**
+ * Driver hook for glEndPerformanceMonitorAMD().
+ */
+static void
+brw_end_perf_monitor(struct gl_context *ctx,
+ struct gl_perf_monitor_object *m)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
+ if (aggregating_counters_needed(brw, m)) {
+ snapshot_aggregating_counters(brw, monitor->gpu_facing_bo,
+ brw->perfmon.total_counter_size);
+ }
+}
+
+/**
+ * Reset a performance monitor, throwing away any results.
+ */
+static void
+brw_reset_perf_monitor(struct gl_context *ctx,
+ struct gl_perf_monitor_object *m)
+{
+ struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
+
+ if (monitor->gpu_facing_bo) {
+ drm_intel_bo_unreference(monitor->gpu_facing_bo);
+ monitor->gpu_facing_bo = NULL;
+ }
+
+ if (m->Active) {
+ brw_begin_perf_monitor(ctx, m);
+ }
+}
+
+/**
+ * Is a performance monitor result available?
+ */
+static GLboolean
+brw_is_perf_monitor_result_available(struct gl_perf_monitor_object *m)
+{
+ struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
+ return !m->Active && monitor->gpu_facing_bo &&
+ !drm_intel_bo_busy(monitor->gpu_facing_bo);
+}
+
+/**
+ * Get the performance monitor result.
+ */
+static void
+brw_get_perf_monitor_result(struct gl_context *ctx,
+ struct gl_perf_monitor_object *m,
+ GLsizei data_size,
+ GLuint *data,
+ GLint *bytes_written)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
+
+ /* This hook should only be called when results are available. */
+ assert(monitor->gpu_facing_bo != NULL);
+
+ drm_intel_bo_map(monitor->gpu_facing_bo, false);
+ unsigned *gpu_bo = monitor->gpu_facing_bo->virtual;
+
+ /* Copy data from the GPU-facing BO to the supplied array.
+ *
+ * The output data format is: <group ID, counter ID, value> for each
+ * active counter. The API allows counters to appear in any order.
+ */
+ GLsizei offset = 0;
+ for (int i = 0; i < ctx->PerfMonitor.NumCounters; i++) {
+ const struct gl_perf_monitor_counter *c = &ctx->PerfMonitor.Counters[i];
+
+ if (!BITSET_TEST(m->ActiveCounters, i))
+ continue;
+
+ data[offset++] = c->GroupID;
+ data[offset++] = c->ID;
+
+ /* Skip REPORT_ID and TIMESTAMP fields. */
+ uint32_t first_index = (3 + c->ID) * sizeof(uint32_t);
+ uint32_t second_index = brw->perfmon.total_counter_size + first_index;
+
+ /* Won't work for uint64_t values, but we don't have any */
+ data[offset] = gpu_bo[second_index] - gpu_bo[first_index];
+ offset += _mesa_perf_monitor_counter_size(c) / sizeof(uint32_t);
+ }
+
+ drm_intel_bo_unmap(monitor->gpu_facing_bo);
+
+ if (bytes_written)
+ *bytes_written = offset * sizeof(uint32_t);
+}
+
+void
+brw_init_performance_monitors(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ struct gl_context *ctx = &intel->ctx;
+
+ ctx->Driver.NewPerfMonitor = brw_new_perf_monitor;
+ ctx->Driver.DeletePerfMonitor = brw_delete_perf_monitor;
+ ctx->Driver.BeginPerfMonitor = brw_begin_perf_monitor;
+ ctx->Driver.EndPerfMonitor = brw_end_perf_monitor;
+ ctx->Driver.ResetPerfMonitor = brw_reset_perf_monitor;
+ ctx->Driver.IsPerfMonitorResultAvailable = brw_is_perf_monitor_result_available;
+ ctx->Driver.GetPerfMonitorResult = brw_get_perf_monitor_result;
+
+ if (intel->gen == 5) {
+ ctx->PerfMonitor.Groups = perf_groups;
+ ctx->PerfMonitor.NumGroups = ARRAY_SIZE(perf_groups);
+
+ ctx->PerfMonitor.Counters = gen5_counters;
+ ctx->PerfMonitor.NumCounters = ARRAY_SIZE(gen5_counters);
+
+ brw->perfmon.total_counter_size =
+ (3 + ctx->PerfMonitor.NumCounters) * sizeof(uint32_t);
+ }
+}
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c
index 1ad728a..3ef7fb9 100755
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -111,6 +111,9 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.ARB_texture_storage_multisample = true;
}
+ if (intel->gen == 5)
+ ctx->Extensions.AMD_performance_monitor = true;
+
if (intel->gen >= 5) {
ctx->Extensions.ARB_texture_query_lod = true;
ctx->Extensions.EXT_timer_query = true;
--
1.8.2.1
More information about the mesa-dev
mailing list