[Intel-gfx] [PATCH 3/3] intel_perf_counters: Add support for Sandybridge.
Daniel Vetter
daniel at ffwll.ch
Wed Mar 27 12:29:38 CET 2013
On Tue, Mar 26, 2013 at 10:06:39PM -0700, Kenneth Graunke wrote:
> While the Sandybridge PRM doesn't have any documentation on the GPU's
> performance counters, a lot of information can be gleaned from the older
> Ironlake PRM. Oddly, none of the information documented there actually
> appears to apply to Ironlake. However, it apparently works just great
> on Sandybridge.
>
> Since this information has all been publicly available on the internet
> for around three years, we can use it.
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
Merged, thanks for the patches.
-Daniel
> ---
> tools/intel_perf_counters.c | 146 ++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 146 insertions(+)
>
> diff --git a/tools/intel_perf_counters.c b/tools/intel_perf_counters.c
> index fd268b1..b528361 100644
> --- a/tools/intel_perf_counters.c
> +++ b/tools/intel_perf_counters.c
> @@ -22,9 +22,21 @@
> *
> * Authors:
> * Eric Anholt <eric at anholt.net>
> + * Kenneth Graunke <kenneth at whitecape.org>
> + *
> + * While documentation for performance counters is suspiciously missing from the
> + * Sandybridge PRM, they were documented in Volume 1 Part 3 of the Ironlake PRM.
> + *
> + * A lot of the Ironlake PRM actually unintentionally documents Sandybridge
> + * due to mistakes made when updating the documentation for Gen6+. Many of
> + * these mislabeled sections carried forward to the public documentation.
> + *
> + * The Ironlake PRMs have been publicly available since 2010 and are online at:
> + * https://01.org/linuxgraphics/documentation/2010-intel-core-processor-family
> */
>
> #include <unistd.h>
> +#include <stdbool.h>
> #include <stdlib.h>
> #include <stdio.h>
> #include <err.h>
> @@ -71,6 +83,60 @@ const char *gen5_counter_names[GEN5_COUNTER_COUNT] = {
> "cycles any EU is stalled for math",
> };
>
> +#define GEN6_COUNTER_COUNT 29
> +
> +/**
> + * Sandybridge: Counter Select = 001
> + * A0 A1 A2 A3 A4 TIMESTAMP RPT_ID
> + * A5 A6 A7 A8 A9 A10 A11 A12
> + * A13 A14 A15 A16 A17 A18 A19 A20
> + * A21 A22 A23 A24 A25 A26 A27 A28
> + */
> +const int gen6_counter_format = 1;
> +
> +/**
> + * Names for aggregating counters A0-A28.
> + *
> + * While the Ironlake PRM clearly documents that there are 29 counters (A0-A28),
> + * it only lists the names for 28 of them; one is missing. However, careful
> + * examination reveals a pattern: there are five GS counters (Active, Stall,
> + * Core Stall, # threads loaded, and ready but not running time). There are
> + * also five PS counters, in the same order. But there are only four VS
> + * counters listed - the number of VS threads loaded is missing. Presumably,
> + * it exists and is counter 5, and the rest are shifted over one place.
> + */
> +const char *gen6_counter_names[GEN6_COUNTER_COUNT] = {
> + [0] = "Aggregated Core Array Active",
> + [1] = "Aggregated Core Array Stalled",
> + [2] = "Vertex Shader Active Time",
> + [3] = "Vertex Shader Stall Time",
> + [4] = "Vertex Shader Stall Time - Core Stall",
> + [5] = "# VS threads loaded",
> + [6] = "Vertex Shader Ready but not running time",
> + [7] = "Geometry Shader Active Time",
> + [8] = "Geometry Shader Stall Time",
> + [9] = "Geometry Shader Stall Time - Core Stall",
> + [10] = "# GS threads loaded",
> + [11] = "Geometry Shader ready but not running Time",
> + [12] = "Pixel Shader Active Time",
> + [13] = "Pixel Shader Stall Time",
> + [14] = "Pixel Shader Stall Time - Core Stall",
> + [15] = "# PS threads loaded",
> + [16] = "Pixel Shader ready but not running Time",
> + [17] = "Early Z Test Pixels Passing",
> + [18] = "Early Z Test Pixels Failing",
> + [19] = "Early Stencil Test Pixels Passing",
> + [20] = "Early Stencil Test Pixels Failing",
> + [21] = "Pixel Kill Count",
> + [22] = "Alpha Test Pixels Failed",
> + [23] = "Post PS Stencil Pixels Failed",
> + [24] = "Post PS Z buffer Pixels Failed",
> + [25] = "Pixels/samples Written in the frame buffer",
> + [26] = "GPU Busy",
> + [27] = "CL active and not stalled",
> + [28] = "SF active and stalled",
> +};
> +
> int have_totals = 0;
> uint32_t *totals;
> uint32_t *last_counter;
> @@ -85,6 +151,20 @@ struct intel_batchbuffer *batch;
> #define MI_COUNTER_ADDRESS_GTT (1 << 0)
> /* DW2: report ID */
>
> +/**
> + * According to the Sandybridge PRM, Volume 1, Part 1, page 48,
> + * MI_REPORT_PERF_COUNT is now opcode 0x28. The Ironlake PRM, Volume 1,
> + * Part 3 details how it works.
> + */
> +/* DW0 */
> +#define GEN6_MI_REPORT_PERF_COUNT (0x28 << 23)
> +/* DW1 and 2 are the same as above */
> +
> +/* OACONTROL exists on Gen6+ but is documented in the Ironlake PRM */
> +#define OACONTROL 0x2360
> +# define OACONTROL_COUNTER_SELECT_SHIFT 2
> +# define PERFORMANCE_COUNTER_ENABLE (1 << 0)
> +
> static void
> gen5_get_counters(void)
> {
> @@ -124,6 +204,45 @@ gen5_get_counters(void)
> drm_intel_bo_unreference(stats_bo);
> }
>
> +static void
> +gen6_get_counters(void)
> +{
> + int i;
> + drm_intel_bo *stats_bo;
> + uint32_t *stats_result;
> +
> + /* Map from counter names to their index in the buffer object */
> + static const int buffer_index[GEN6_COUNTER_COUNT] =
> + {
> + 7, 6, 5, 4, 3,
> + 15, 14, 13, 12, 11, 10, 9, 8,
> + 23, 22, 21, 20, 19, 18, 17, 16,
> + 31, 30, 29, 28, 27, 26, 25, 24,
> + };
> +
> + stats_bo = drm_intel_bo_alloc(bufmgr, "stats", 4096, 4096);
> +
> + BEGIN_BATCH(3);
> + OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT | (3 - 2));
> + OUT_RELOC(stats_bo,
> + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
> + MI_COUNTER_ADDRESS_GTT);
> + OUT_BATCH(0);
> + ADVANCE_BATCH();
> +
> + intel_batchbuffer_flush_on_ring(batch, I915_EXEC_RENDER);
> +
> + drm_intel_bo_map(stats_bo, 0);
> + stats_result = stats_bo->virtual;
> + for (i = 0; i < GEN6_COUNTER_COUNT; i++) {
> + totals[i] += stats_result[buffer_index[i]] - last_counter[i];
> + last_counter[i] = stats_result[buffer_index[i]];
> + }
> +
> + drm_intel_bo_unmap(stats_bo);
> + drm_intel_bo_unreference(stats_bo);
> +}
> +
> #define STATS_CHECK_FREQUENCY 100
> #define STATS_REPORT_FREQUENCY 2
>
> @@ -131,6 +250,7 @@ int
> main(int argc, char **argv)
> {
> uint32_t devid;
> + int counter_format;
> int counter_count;
> const char **counter_name;
> void (*get_counters)(void);
> @@ -138,6 +258,7 @@ main(int argc, char **argv)
> char clear_screen[] = {0x1b, '[', 'H',
> 0x1b, '[', 'J',
> 0x0};
> + bool oacontrol = true;
> int fd;
> int l;
>
> @@ -152,10 +273,27 @@ main(int argc, char **argv)
> counter_name = gen5_counter_names;
> counter_count = GEN5_COUNTER_COUNT;
> get_counters = gen5_get_counters;
> + oacontrol = false;
> + } else if (IS_GEN6(devid)) {
> + counter_name = gen6_counter_names;
> + counter_count = GEN6_COUNTER_COUNT;
> + counter_format = gen6_counter_format;
> + get_counters = gen6_get_counters;
> } else {
> printf("This tool is not yet supported on your platform.\n");
> abort();
> }
> +
> + if (oacontrol) {
> + /* Forcewake */
> + intel_register_access_init(intel_get_pci_device(), 0);
> +
> + /* Enable performance counters */
> + intel_register_write(OACONTROL,
> + counter_format << OACONTROL_COUNTER_SELECT_SHIFT |
> + PERFORMANCE_COUNTER_ENABLE);
> + }
> +
> totals = calloc(counter_count, sizeof(uint32_t));
> last_counter = calloc(counter_count, sizeof(uint32_t));
>
> @@ -180,6 +318,14 @@ main(int argc, char **argv)
> }
> }
>
> + if (oacontrol) {
> + /* Disable performance counters */
> + intel_register_write(OACONTROL, 0);
> +
> + /* Forcewake */
> + intel_register_access_fini();
> + }
> +
> free(totals);
> free(last_counter);
>
> --
> 1.8.2
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
More information about the Intel-gfx
mailing list