[Pixman] [PATCH 4/8] perfstat: Add performance statistics analyzer
Taekyun Kim
podain77 at gmail.com
Mon Sep 19 07:08:22 PDT 2011
From: Taekyun Kim <tkq.kim at samsung.com>
Performance statistics analyzer will gather performance data for
each composite paths and report those results at the end of the
program. This is useful for profiling other application's use of
pixman. The result will include images, scanlines and pixels being
composited and total time spent by that path.
You can turn on perfstat analyzer by setting environmental variable
"PIXMAN_ENABLE_PERFSTAT".
---
pixman/Makefile.sources | 1 +
pixman/pixman-perf-stat.c | 541 +++++++++++++++++++++++++++++++++++++++++++++
pixman/pixman-private.h | 46 ++++
3 files changed, 588 insertions(+), 0 deletions(-)
create mode 100644 pixman/pixman-perf-stat.c
diff --git a/pixman/Makefile.sources b/pixman/Makefile.sources
index ca3f001..24be76f 100644
--- a/pixman/Makefile.sources
+++ b/pixman/Makefile.sources
@@ -24,6 +24,7 @@ libpixman_sources = \
pixman-timer.c \
pixman-trap.c \
pixman-utils.c \
+ pixman-perf-stat.c \
$(NULL)
libpixman_headers = \
diff --git a/pixman/pixman-perf-stat.c b/pixman/pixman-perf-stat.c
new file mode 100644
index 0000000..2986777
--- /dev/null
+++ b/pixman/pixman-perf-stat.c
@@ -0,0 +1,541 @@
+/*
+ * Copyright © 2011 SCore Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Siarhei Siamashka (siarhei.siamashka at nokia.com)
+ * Author: Taekyun Kim (podain77 at gmail.com)
+ */
+
+/* Performance statistics analyzer.
+ * This tool accumulate performance data for each composite path and
+ * report the result at the end of the program.
+ *
+ * TODO: Defense against tool chain which does not support attribute constructor
+ * TODO: Use debug channel rather than stdout
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "pixman-private.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef HAVE_GETTIMEOFDAY
+#include <sys/time.h>
+#else
+#include <time.h>
+#endif
+
+#define PERFSTAT_MAX_ENTRIES 1024
+#define PERFSTAT_LOG_CHANNEL stdout
+
+typedef enum
+{
+ PERFSTAT_COMPOSITE,
+ PERFSTAT_FILL,
+ PERFSTAT_BLT,
+} perfstat_type_t;
+
+typedef struct perfstat_entry perfstat_entry_t;
+typedef struct perfstat_composite perfstat_composite_t;
+typedef struct perfstat_fill perfstat_fill_t;
+typedef struct perfstat_blt perfstat_blt_t;
+
+struct perfstat_composite
+{
+ pixman_op_t op;
+ pixman_format_code_t src;
+ uint32_t src_flags;
+ pixman_format_code_t mask;
+ uint32_t mask_flags;
+ pixman_format_code_t dest;
+ uint32_t dest_flags;
+};
+
+struct perfstat_fill
+{
+ int32_t bpp;
+};
+
+struct perfstat_blt
+{
+ int32_t src_bpp;
+ int32_t dst_bpp;
+};
+
+struct perfstat_entry
+{
+ perfstat_type_t type;
+ pixman_implementation_type_t imp_type;
+
+ /* API info. */
+ union
+ {
+ perfstat_composite_t composite;
+ perfstat_fill_t fill;
+ perfstat_blt_t blt;
+ } api;
+
+ /* Performance data. */
+ uint32_t images_count;
+ uint32_t scanlines_count;
+ uint64_t pixels_count;
+ double time;
+};
+
+static pixman_mutex_t perfstat_mutex;
+static perfstat_entry_t perfstat[PERFSTAT_MAX_ENTRIES];
+static int perfstat_entry_count = 0;
+static pixman_bool_t enable_perfstat = FALSE;
+
+double
+perfstat_get_time (void)
+{
+#ifdef HAVE_GETTIMEOFDAY
+ struct timeval tv;
+
+ gettimeofday (&tv, NULL);
+ return (double)((int64_t)tv.tv_sec * 1000000 + tv.tv_usec) / 1000000.;
+#else
+ return (double)clock() / (double)CLOCKS_PER_SEC;
+#endif
+}
+
+void
+perfstat_add_composite (pixman_implementation_type_t imp_type,
+ pixman_op_t op,
+ pixman_format_code_t src,
+ uint32_t src_flags,
+ pixman_format_code_t mask,
+ uint32_t mask_flags,
+ pixman_format_code_t dest,
+ uint32_t dest_flags,
+ int32_t width,
+ int32_t height,
+ double elapsed)
+{
+ int i;
+
+ if (src == PIXMAN_unknown || mask == PIXMAN_unknown)
+ return;
+
+ if (!enable_perfstat)
+ return;
+
+ PIXMAN_MUTEX_LOCK (perfstat_mutex);
+
+ for (i = 0; i < perfstat_entry_count; ++i)
+ {
+ if (perfstat[i].type == PERFSTAT_COMPOSITE &&
+ perfstat[i].imp_type == imp_type &&
+ perfstat[i].api.composite.op == op &&
+ perfstat[i].api.composite.src == src &&
+ perfstat[i].api.composite.src_flags == src_flags &&
+ perfstat[i].api.composite.mask == mask &&
+ perfstat[i].api.composite.mask_flags == mask_flags &&
+ perfstat[i].api.composite.dest == dest &&
+ perfstat[i].api.composite.dest_flags == dest_flags)
+ {
+ perfstat[i].images_count++;
+ perfstat[i].scanlines_count += height;
+ perfstat[i].pixels_count += (uint64_t)width * height;
+ perfstat[i].time += elapsed;
+
+ PIXMAN_MUTEX_UNLOCK (perfstat_mutex);
+ return;
+ }
+ }
+
+ if (perfstat_entry_count >= PERFSTAT_MAX_ENTRIES)
+ {
+ PIXMAN_MUTEX_UNLOCK (perfstat_mutex);
+ return;
+ }
+
+ perfstat[perfstat_entry_count].type = PERFSTAT_COMPOSITE;
+ perfstat[perfstat_entry_count].imp_type = imp_type;
+ perfstat[perfstat_entry_count].api.composite.op = op;
+ perfstat[perfstat_entry_count].api.composite.src = src;
+ perfstat[perfstat_entry_count].api.composite.src_flags = src_flags;
+ perfstat[perfstat_entry_count].api.composite.mask = mask;
+ perfstat[perfstat_entry_count].api.composite.mask_flags = mask_flags;
+ perfstat[perfstat_entry_count].api.composite.dest = dest;
+ perfstat[perfstat_entry_count].api.composite.dest_flags = dest_flags;
+
+ perfstat[perfstat_entry_count].images_count = 1;
+ perfstat[perfstat_entry_count].scanlines_count = height;
+ perfstat[perfstat_entry_count].pixels_count = (uint64_t)width * height;
+ perfstat[perfstat_entry_count].time = elapsed;
+
+ perfstat_entry_count++;
+
+ PIXMAN_MUTEX_UNLOCK (perfstat_mutex);
+}
+
+void
+perfstat_add_fill (pixman_implementation_type_t imp_type,
+ int32_t bpp,
+ int32_t width,
+ int32_t height,
+ double elapsed)
+{
+ int i;
+
+ if (!enable_perfstat)
+ return;
+
+ PIXMAN_MUTEX_LOCK (perfstat_mutex);
+
+ for (i = 0; i < perfstat_entry_count; ++i)
+ {
+ if (perfstat[i].type == PERFSTAT_FILL &&
+ perfstat[i].imp_type == imp_type &&
+ perfstat[i].api.fill.bpp == bpp)
+ {
+ perfstat[i].images_count++;
+ perfstat[i].scanlines_count += height;
+ perfstat[i].pixels_count += (uint64_t)width * height;
+ perfstat[i].time += elapsed;
+
+ PIXMAN_MUTEX_UNLOCK (perfstat_mutex);
+ return;
+ }
+ }
+
+ if (perfstat_entry_count >= PERFSTAT_MAX_ENTRIES)
+ {
+ PIXMAN_MUTEX_UNLOCK (perfstat_mutex);
+ return;
+ }
+
+ perfstat[perfstat_entry_count].type = PERFSTAT_FILL;
+ perfstat[perfstat_entry_count].imp_type = imp_type;
+ perfstat[perfstat_entry_count].api.fill.bpp = bpp;
+
+ perfstat[perfstat_entry_count].images_count = 1;
+ perfstat[perfstat_entry_count].scanlines_count = height;
+ perfstat[perfstat_entry_count].pixels_count = (uint64_t)width * height;
+ perfstat[perfstat_entry_count].time = elapsed;
+
+ perfstat_entry_count++;
+
+ PIXMAN_MUTEX_UNLOCK (perfstat_mutex);
+}
+
+void
+perfstat_add_blt (pixman_implementation_type_t imp_type,
+ int32_t src_bpp,
+ int32_t dst_bpp,
+ int32_t width,
+ int32_t height,
+ double elapsed)
+{
+ int i;
+
+ if (!enable_perfstat)
+ return;
+
+ PIXMAN_MUTEX_LOCK (perfstat_mutex);
+
+ for (i = 0; i < perfstat_entry_count; ++i)
+ {
+ if (perfstat[i].type == PERFSTAT_BLT &&
+ perfstat[i].imp_type == imp_type &&
+ perfstat[i].api.blt.src_bpp == src_bpp &&
+ perfstat[i].api.blt.dst_bpp == dst_bpp)
+ {
+ perfstat[i].images_count++;
+ perfstat[i].scanlines_count += height;
+ perfstat[i].pixels_count += (uint64_t)width * height;
+ perfstat[i].time += elapsed;
+
+ PIXMAN_MUTEX_UNLOCK (perfstat_mutex);
+ return;
+ }
+ }
+
+ if (perfstat_entry_count >= PERFSTAT_MAX_ENTRIES)
+ {
+ PIXMAN_MUTEX_UNLOCK (perfstat_mutex);
+ return;
+ }
+
+ perfstat[perfstat_entry_count].type = PERFSTAT_BLT;
+ perfstat[perfstat_entry_count].imp_type = imp_type;
+ perfstat[perfstat_entry_count].api.blt.src_bpp = src_bpp;
+ perfstat[perfstat_entry_count].api.blt.dst_bpp = dst_bpp;
+
+ perfstat[perfstat_entry_count].images_count = 1;
+ perfstat[perfstat_entry_count].scanlines_count = height;
+ perfstat[perfstat_entry_count].pixels_count = (uint64_t)width * height;
+ perfstat[perfstat_entry_count].time = elapsed;
+
+ perfstat_entry_count++;
+
+ PIXMAN_MUTEX_UNLOCK (perfstat_mutex);
+
+}
+
+#ifdef PIXMAN_ENABLE_PERFSTAT
+
+static int
+compare (const void *p1, const void *p2)
+{
+ const perfstat_entry_t *e1 = p1;
+ const perfstat_entry_t *e2 = p2;
+
+ if (e1->time > e2->time)
+ return -1;
+ else if (e1->time == e2->time)
+ return 0;
+ else
+ return 1;
+}
+
+static const char*
+get_implementation_string (pixman_implementation_type_t imp_type)
+{
+ switch (imp_type)
+ {
+ case PIXMAN_IMPLEMENTATION_UNKNOWN:
+ return " Unknown";
+ case PIXMAN_IMPLEMENTATION_GENERAL:
+ return " General";
+ case PIXMAN_IMPLEMENTATION_C_FAST_PATH:
+ return " C fast";
+ case PIXMAN_IMPLEMENTATION_SSE2:
+ return " SSE2";
+ case PIXMAN_IMPLEMENTATION_MMX:
+ return " MMX";
+ case PIXMAN_IMPLEMENTATION_ARM_SIMD:
+ return "ARM SIMD";
+ case PIXMAN_IMPLEMENTATION_ARM_NEON:
+ return "ARM NEON";
+ default:
+ break;
+ }
+
+ return "Unknown ";
+}
+
+static const char*
+get_op_string (pixman_op_t op)
+{
+ switch (op)
+ {
+ case PIXMAN_OP_CLEAR:
+ return " clear";
+ case PIXMAN_OP_SRC:
+ return " src";
+ case PIXMAN_OP_DST:
+ return " dst";
+ case PIXMAN_OP_OVER:
+ return " over";
+ case PIXMAN_OP_OVER_REVERSE:
+ return "over_rev";
+ case PIXMAN_OP_IN:
+ return " in";
+ case PIXMAN_OP_IN_REVERSE:
+ return " in_rev";
+ case PIXMAN_OP_OUT:
+ return " out";
+ case PIXMAN_OP_OUT_REVERSE:
+ return " out_rev";
+ case PIXMAN_OP_ATOP:
+ return " atop";
+ case PIXMAN_OP_ATOP_REVERSE:
+ return "atop_rev";
+ case PIXMAN_OP_XOR:
+ return " xor";
+ case PIXMAN_OP_ADD:
+ return " add";
+ case PIXMAN_OP_SATURATE:
+ return " sat";
+ default:
+ return " etc";
+ };
+}
+
+static const char*
+get_format_string (pixman_format_code_t format)
+{
+ switch ((uint32_t)format)
+ {
+ case PIXMAN_null:
+ return "null";
+ case PIXMAN_solid:
+ return " n";
+ case PIXMAN_pixbuf:
+ return " pix";
+ case PIXMAN_rpixbuf:
+ return "rpix";
+ case PIXMAN_any:
+ return " any";
+ case PIXMAN_a1:
+ return " 1";
+ case PIXMAN_a8:
+ return " 8";
+ case PIXMAN_r5g6b5:
+ return "0565";
+ case PIXMAN_r8g8b8:
+ return "0888";
+ case PIXMAN_x8r8g8b8:
+ return "x888";
+ case PIXMAN_a8r8g8b8:
+ return "8888";
+ default:
+ return " etc";
+ }
+}
+
+static const char*
+get_transform_string (uint32_t flag)
+{
+ if (flag & FAST_PATH_ID_TRANSFORM)
+ return "non-scaled";
+ else if (flag & FAST_PATH_SCALE_TRANSFORM)
+ {
+ if (flag & FAST_PATH_NEAREST_FILTER)
+ return " nearest";
+ else if (flag & FAST_PATH_BILINEAR_FILTER)
+ return " bilinear";
+ }
+ else if (flag & FAST_PATH_ROTATE_90_TRANSFORM)
+ return " rotate_90";
+ else if (flag & FAST_PATH_ROTATE_180_TRANSFORM)
+ return "rotate_180";
+ else if (flag & FAST_PATH_ROTATE_270_TRANSFORM)
+ return "rotate_270";
+
+ return " affine";
+}
+
+static const char*
+get_repeat_string (uint32_t flag)
+{
+ if (flag & FAST_PATH_SAMPLES_COVER_CLIP)
+ return " cover";
+ else if ((flag & FAST_PATH_NORMAL_REPEAT) == FAST_PATH_NORMAL_REPEAT)
+ return " normal";
+ else if ((flag & FAST_PATH_PAD_REPEAT) == FAST_PATH_PAD_REPEAT)
+ return " pad";
+ else if ((flag & FAST_PATH_NONE_REPEAT) == FAST_PATH_NONE_REPEAT)
+ return " none";
+ else if ((flag & FAST_PATH_REFLECT_REPEAT) == FAST_PATH_REFLECT_REPEAT)
+ return "reflect";
+
+ return "unknown";
+}
+
+pixman_bool_t
+perfstat_is_enabled (void)
+{
+ return enable_perfstat;
+}
+
+static void __attribute__((constructor))
+perfstat_constructor (void)
+{
+ if (getenv ("PIXMAN_ENABLE_PERF_STAT"))
+ enable_perfstat = TRUE;
+
+ PIXMAN_MUTEX_INIT (perfstat_mutex);
+}
+
+static void __attribute__((destructor))
+perfstat_destructor (void)
+{
+ int i;
+ int32_t total_images = 0;
+ int32_t total_scanlines = 0;
+ uint64_t total_pixels = 0;
+ double total_time = 0.0;
+
+ PIXMAN_MUTEX_FINI (perfstat_mutex);
+
+ if (!enable_perfstat)
+ return;
+
+ /* sort entries */
+ qsort (perfstat, perfstat_entry_count, sizeof(perfstat[0]), compare);
+
+ fprintf (PERFSTAT_LOG_CHANNEL,
+ "[ #] function transform op src mask dst repeat "
+ "images scanlines pixels(Mpix) time(s) speed(Mpix/s) backend\n");
+
+ /* show performance statistics */
+ for (i = 0; i < perfstat_entry_count; ++i)
+ {
+ fprintf (PERFSTAT_LOG_CHANNEL, "[%4d] ", i);
+
+ if (perfstat[i].type == PERFSTAT_COMPOSITE)
+ {
+ fprintf (PERFSTAT_LOG_CHANNEL,
+ "composite : "
+ "%s %s %s %s %s %s ",
+ get_transform_string (perfstat[i].api.composite.src_flags),
+ get_op_string (perfstat[i].api.composite.op),
+ get_format_string (perfstat[i].api.composite.src),
+ get_format_string (perfstat[i].api.composite.mask),
+ get_format_string (perfstat[i].api.composite.dest),
+ get_repeat_string (perfstat[i].api.composite.src_flags));
+ }
+ else if (perfstat[i].type == PERFSTAT_FILL)
+ {
+ fprintf (PERFSTAT_LOG_CHANNEL,
+ " fill : "
+ " %4d ",
+ perfstat[i].api.fill.bpp);
+ }
+ else if (perfstat[i].type == PERFSTAT_BLT)
+ {
+ fprintf (PERFSTAT_LOG_CHANNEL,
+ " blt : "
+ " %4d %4d ",
+ perfstat[i].api.blt.src_bpp,
+ perfstat[i].api.blt.dst_bpp);
+ }
+
+ fprintf (PERFSTAT_LOG_CHANNEL,
+ "%8d %9d %8.3f %8.3f %8.3f %s\n",
+ perfstat[i].images_count,
+ perfstat[i].scanlines_count,
+ perfstat[i].pixels_count / 1000000.0,
+ perfstat[i].time,
+ (perfstat[i].pixels_count / perfstat[i].time) / 1000000.0,
+ get_implementation_string (perfstat[i].imp_type));
+
+ total_images += perfstat[i].images_count;
+ total_scanlines += perfstat[i].scanlines_count;
+ total_pixels += perfstat[i].pixels_count;
+ total_time += perfstat[i].time;
+ }
+
+ printf ("Total images : %d\n", total_images);
+ printf ("Total scanlines : %d\n", total_scanlines);
+ printf ("Total pixels : %.3f Mpix\n", total_pixels / 1000000.0);
+ printf ("Total time : %.6f s\n", total_time);
+}
+
+#endif
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 4541bcd..a16628d 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -937,4 +937,50 @@ void pixman_timer_register (pixman_timer_t *timer);
#endif /* PIXMAN_TIMERS */
+/*
+ * Performance statistics analyzer
+ */
+
+double
+perfstat_get_time (void);
+
+void
+perfstat_add_composite (pixman_implementation_type_t imp_type,
+ pixman_op_t op,
+ pixman_format_code_t src,
+ uint32_t src_flags,
+ pixman_format_code_t mask,
+ uint32_t mask_flags,
+ pixman_format_code_t dest,
+ uint32_t dest_flags,
+ int32_t width,
+ int32_t height,
+ double elapsed);
+
+void
+perfstat_add_fill (pixman_implementation_type_t imp_type,
+ int32_t bpp,
+ int32_t width,
+ int32_t height,
+ double elapsed);
+
+void
+perfstat_add_blt (pixman_implementation_type_t imp_type,
+ int32_t src_bpp,
+ int32_t dst_bpp,
+ int32_t width,
+ int32_t height,
+ double elapsed);
+
+#ifndef PIXMAN_ENABLE_PERFSTAT
+static inline pixman_bool_t
+perfstat_is_enabled (void)
+{
+ return FALSE;
+}
+#else
+pixman_bool_t
+perfstat_is_enabled (void);
+#endif /* PIXMAN_ENABLE_PERFSTAT */
+
#endif /* PIXMAN_PRIVATE_H */
--
1.7.1
More information about the Pixman
mailing list