[Pixman] [PATCH 4/8] perfstat: Add performance statistics analyzer

Taekyun Kim podain77 at gmail.com
Mon Sep 19 07:08:22 PDT 2011


From: Taekyun Kim <tkq.kim at samsung.com>

Performance statistics analyzer will gather performance data for
each composite paths and report those results at the end of the
program. This is useful for profiling other application's use of
pixman. The result will include images, scanlines and pixels being
composited and total time spent by that path.

You can turn on perfstat analyzer by setting environmental variable
"PIXMAN_ENABLE_PERFSTAT".
---
 pixman/Makefile.sources   |    1 +
 pixman/pixman-perf-stat.c |  541 +++++++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-private.h   |   46 ++++
 3 files changed, 588 insertions(+), 0 deletions(-)
 create mode 100644 pixman/pixman-perf-stat.c

diff --git a/pixman/Makefile.sources b/pixman/Makefile.sources
index ca3f001..24be76f 100644
--- a/pixman/Makefile.sources
+++ b/pixman/Makefile.sources
@@ -24,6 +24,7 @@ libpixman_sources =			\
 	pixman-timer.c			\
 	pixman-trap.c			\
 	pixman-utils.c			\
+	pixman-perf-stat.c		\
 	$(NULL)
 
 libpixman_headers =			\
diff --git a/pixman/pixman-perf-stat.c b/pixman/pixman-perf-stat.c
new file mode 100644
index 0000000..2986777
--- /dev/null
+++ b/pixman/pixman-perf-stat.c
@@ -0,0 +1,541 @@
+/*
+ * Copyright © 2011 SCore Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author:  Siarhei Siamashka (siarhei.siamashka at nokia.com)
+ * Author:  Taekyun Kim (podain77 at gmail.com)
+ */
+
+/* Performance statistics analyzer.
+ * This tool accumulate performance data for each composite path and
+ * report the result at the end of the program.
+ *
+ * TODO: Defense against tool chain which does not support attribute constructor
+ * TODO: Use debug channel rather than stdout
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "pixman-private.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef HAVE_GETTIMEOFDAY
+#include <sys/time.h>
+#else
+#include <time.h>
+#endif
+
+#define PERFSTAT_MAX_ENTRIES	1024
+#define PERFSTAT_LOG_CHANNEL	stdout
+
+typedef enum
+{
+    PERFSTAT_COMPOSITE,
+    PERFSTAT_FILL,
+    PERFSTAT_BLT,
+} perfstat_type_t;
+
+typedef struct perfstat_entry	    perfstat_entry_t;
+typedef struct perfstat_composite   perfstat_composite_t;
+typedef struct perfstat_fill	    perfstat_fill_t;
+typedef struct perfstat_blt	    perfstat_blt_t;
+
+struct perfstat_composite
+{
+    pixman_op_t			    op;
+    pixman_format_code_t	    src;
+    uint32_t			    src_flags;
+    pixman_format_code_t	    mask;
+    uint32_t			    mask_flags;
+    pixman_format_code_t	    dest;
+    uint32_t			    dest_flags;
+};
+
+struct perfstat_fill
+{
+    int32_t			    bpp;
+};
+
+struct perfstat_blt
+{
+    int32_t			    src_bpp;
+    int32_t			    dst_bpp;
+};
+
+struct perfstat_entry
+{
+    perfstat_type_t		    type;
+    pixman_implementation_type_t    imp_type;
+
+    /* API info. */
+    union
+    {
+	perfstat_composite_t    composite;
+	perfstat_fill_t	        fill;
+	perfstat_blt_t		blt;
+    } api;
+
+    /* Performance data. */
+    uint32_t			    images_count;
+    uint32_t			    scanlines_count;
+    uint64_t			    pixels_count;
+    double			    time;
+};
+
+static pixman_mutex_t perfstat_mutex;
+static perfstat_entry_t perfstat[PERFSTAT_MAX_ENTRIES];
+static int perfstat_entry_count = 0;
+static pixman_bool_t enable_perfstat = FALSE;
+
+double
+perfstat_get_time (void)
+{
+#ifdef HAVE_GETTIMEOFDAY
+    struct timeval tv;
+
+    gettimeofday (&tv, NULL);
+    return (double)((int64_t)tv.tv_sec * 1000000 + tv.tv_usec) / 1000000.;
+#else
+    return (double)clock() / (double)CLOCKS_PER_SEC;
+#endif
+}
+
+void
+perfstat_add_composite (pixman_implementation_type_t	imp_type,
+			pixman_op_t			op,
+			pixman_format_code_t		src,
+			uint32_t			src_flags,
+			pixman_format_code_t		mask,
+			uint32_t			mask_flags,
+			pixman_format_code_t		dest,
+			uint32_t			dest_flags,
+			int32_t				width,
+			int32_t				height,
+			double				elapsed)
+{
+    int i;
+
+    if (src == PIXMAN_unknown || mask == PIXMAN_unknown)
+	return;
+
+    if (!enable_perfstat)
+	return;
+
+    PIXMAN_MUTEX_LOCK (perfstat_mutex);
+
+    for (i = 0; i < perfstat_entry_count; ++i)
+    {
+	if (perfstat[i].type == PERFSTAT_COMPOSITE &&
+	    perfstat[i].imp_type == imp_type &&
+	    perfstat[i].api.composite.op == op &&
+	    perfstat[i].api.composite.src == src &&
+	    perfstat[i].api.composite.src_flags == src_flags &&
+	    perfstat[i].api.composite.mask == mask &&
+	    perfstat[i].api.composite.mask_flags == mask_flags &&
+	    perfstat[i].api.composite.dest == dest &&
+	    perfstat[i].api.composite.dest_flags == dest_flags)
+	{
+	    perfstat[i].images_count++;
+	    perfstat[i].scanlines_count += height;
+	    perfstat[i].pixels_count += (uint64_t)width * height;
+	    perfstat[i].time += elapsed;
+
+	    PIXMAN_MUTEX_UNLOCK (perfstat_mutex);
+	    return;
+	}
+    }
+
+    if (perfstat_entry_count >= PERFSTAT_MAX_ENTRIES)
+    {
+	PIXMAN_MUTEX_UNLOCK (perfstat_mutex);
+	return;
+    }
+
+    perfstat[perfstat_entry_count].type = PERFSTAT_COMPOSITE;
+    perfstat[perfstat_entry_count].imp_type = imp_type;
+    perfstat[perfstat_entry_count].api.composite.op = op;
+    perfstat[perfstat_entry_count].api.composite.src = src;
+    perfstat[perfstat_entry_count].api.composite.src_flags = src_flags;
+    perfstat[perfstat_entry_count].api.composite.mask = mask;
+    perfstat[perfstat_entry_count].api.composite.mask_flags = mask_flags;
+    perfstat[perfstat_entry_count].api.composite.dest = dest;
+    perfstat[perfstat_entry_count].api.composite.dest_flags = dest_flags;
+
+    perfstat[perfstat_entry_count].images_count = 1;
+    perfstat[perfstat_entry_count].scanlines_count = height;
+    perfstat[perfstat_entry_count].pixels_count = (uint64_t)width * height;
+    perfstat[perfstat_entry_count].time = elapsed;
+
+    perfstat_entry_count++;
+
+    PIXMAN_MUTEX_UNLOCK (perfstat_mutex);
+}
+
+void
+perfstat_add_fill (pixman_implementation_type_t	imp_type,
+		   int32_t			bpp,
+		   int32_t			width,
+		   int32_t			height,
+		   double			elapsed)
+{
+    int i;
+
+    if (!enable_perfstat)
+	return;
+
+    PIXMAN_MUTEX_LOCK (perfstat_mutex);
+
+    for (i = 0; i < perfstat_entry_count; ++i)
+    {
+	if (perfstat[i].type == PERFSTAT_FILL &&
+	    perfstat[i].imp_type == imp_type &&
+	    perfstat[i].api.fill.bpp == bpp)
+	{
+	    perfstat[i].images_count++;
+	    perfstat[i].scanlines_count += height;
+	    perfstat[i].pixels_count += (uint64_t)width * height;
+	    perfstat[i].time += elapsed;
+
+	    PIXMAN_MUTEX_UNLOCK (perfstat_mutex);
+	    return;
+	}
+    }
+
+    if (perfstat_entry_count >= PERFSTAT_MAX_ENTRIES)
+    {
+	PIXMAN_MUTEX_UNLOCK (perfstat_mutex);
+	return;
+    }
+
+    perfstat[perfstat_entry_count].type = PERFSTAT_FILL;
+    perfstat[perfstat_entry_count].imp_type = imp_type;
+    perfstat[perfstat_entry_count].api.fill.bpp = bpp;
+
+    perfstat[perfstat_entry_count].images_count = 1;
+    perfstat[perfstat_entry_count].scanlines_count = height;
+    perfstat[perfstat_entry_count].pixels_count = (uint64_t)width * height;
+    perfstat[perfstat_entry_count].time = elapsed;
+
+    perfstat_entry_count++;
+
+    PIXMAN_MUTEX_UNLOCK (perfstat_mutex);
+}
+
+void
+perfstat_add_blt (pixman_implementation_type_t	imp_type,
+		  int32_t			src_bpp,
+		  int32_t			dst_bpp,
+		  int32_t			width,
+		  int32_t			height,
+		  double			elapsed)
+{
+    int i;
+
+    if (!enable_perfstat)
+	return;
+
+    PIXMAN_MUTEX_LOCK (perfstat_mutex);
+
+    for (i = 0; i < perfstat_entry_count; ++i)
+    {
+	if (perfstat[i].type == PERFSTAT_BLT &&
+	    perfstat[i].imp_type == imp_type &&
+	    perfstat[i].api.blt.src_bpp == src_bpp &&
+	    perfstat[i].api.blt.dst_bpp == dst_bpp)
+	{
+	    perfstat[i].images_count++;
+	    perfstat[i].scanlines_count += height;
+	    perfstat[i].pixels_count += (uint64_t)width * height;
+	    perfstat[i].time += elapsed;
+
+	    PIXMAN_MUTEX_UNLOCK (perfstat_mutex);
+	    return;
+	}
+    }
+
+    if (perfstat_entry_count >= PERFSTAT_MAX_ENTRIES)
+    {
+	PIXMAN_MUTEX_UNLOCK (perfstat_mutex);
+	return;
+    }
+
+    perfstat[perfstat_entry_count].type = PERFSTAT_BLT;
+    perfstat[perfstat_entry_count].imp_type = imp_type;
+    perfstat[perfstat_entry_count].api.blt.src_bpp = src_bpp;
+    perfstat[perfstat_entry_count].api.blt.dst_bpp = dst_bpp;
+
+    perfstat[perfstat_entry_count].images_count = 1;
+    perfstat[perfstat_entry_count].scanlines_count = height;
+    perfstat[perfstat_entry_count].pixels_count = (uint64_t)width * height;
+    perfstat[perfstat_entry_count].time = elapsed;
+
+    perfstat_entry_count++;
+
+    PIXMAN_MUTEX_UNLOCK (perfstat_mutex);
+
+}
+
+#ifdef PIXMAN_ENABLE_PERFSTAT
+
+static int
+compare (const void *p1, const void *p2)
+{
+    const perfstat_entry_t *e1 = p1;
+    const perfstat_entry_t *e2 = p2;
+
+    if (e1->time > e2->time)
+	return -1;
+    else if (e1->time == e2->time)
+	return 0;
+    else
+	return 1;
+}
+
+static const char*
+get_implementation_string (pixman_implementation_type_t imp_type)
+{
+    switch (imp_type)
+    {
+    case PIXMAN_IMPLEMENTATION_UNKNOWN:
+	return " Unknown";
+    case PIXMAN_IMPLEMENTATION_GENERAL:
+	return " General";
+    case PIXMAN_IMPLEMENTATION_C_FAST_PATH:
+	return "  C fast";
+    case PIXMAN_IMPLEMENTATION_SSE2:
+	return "    SSE2";
+    case PIXMAN_IMPLEMENTATION_MMX:
+	return "     MMX";
+    case PIXMAN_IMPLEMENTATION_ARM_SIMD:
+	return "ARM SIMD";
+    case PIXMAN_IMPLEMENTATION_ARM_NEON:
+	return "ARM NEON";
+    default:
+	break;
+    }
+
+    return "Unknown ";
+}
+
+static const char*
+get_op_string (pixman_op_t op)
+{
+    switch (op)
+    {
+    case PIXMAN_OP_CLEAR:
+	return "   clear";
+    case PIXMAN_OP_SRC:
+	return "     src";
+    case PIXMAN_OP_DST:
+	return "     dst";
+    case PIXMAN_OP_OVER:
+	return "    over";
+    case PIXMAN_OP_OVER_REVERSE:
+	return "over_rev";
+    case PIXMAN_OP_IN:
+	return "      in";
+    case PIXMAN_OP_IN_REVERSE:
+	return "  in_rev";
+    case PIXMAN_OP_OUT:
+	return "     out";
+    case PIXMAN_OP_OUT_REVERSE:
+	return " out_rev";
+    case PIXMAN_OP_ATOP:
+	return "    atop";
+    case PIXMAN_OP_ATOP_REVERSE:
+	return "atop_rev";
+    case PIXMAN_OP_XOR:
+	return "     xor";
+    case PIXMAN_OP_ADD:
+	return "     add";
+    case PIXMAN_OP_SATURATE:
+	return "     sat";
+    default:
+	return "     etc";
+    };
+}
+
+static const char*
+get_format_string (pixman_format_code_t format)
+{
+    switch ((uint32_t)format)
+    {
+    case PIXMAN_null:
+	return "null";
+    case PIXMAN_solid:
+	return "   n";
+    case PIXMAN_pixbuf:
+	return " pix";
+    case PIXMAN_rpixbuf:
+	return "rpix";
+    case PIXMAN_any:
+	return " any";
+    case PIXMAN_a1:
+	return "   1";
+    case PIXMAN_a8:
+	return "   8";
+    case PIXMAN_r5g6b5:
+	return "0565";
+    case PIXMAN_r8g8b8:
+	return "0888";
+    case PIXMAN_x8r8g8b8:
+	return "x888";
+    case PIXMAN_a8r8g8b8:
+	return "8888";
+    default:
+	return " etc";
+    }
+}
+
+static const char*
+get_transform_string (uint32_t flag)
+{
+    if (flag & FAST_PATH_ID_TRANSFORM)
+	return "non-scaled";
+    else if (flag & FAST_PATH_SCALE_TRANSFORM)
+    {
+	if (flag & FAST_PATH_NEAREST_FILTER)
+	    return "   nearest";
+	else if (flag & FAST_PATH_BILINEAR_FILTER)
+	    return "  bilinear";
+    }
+    else if (flag & FAST_PATH_ROTATE_90_TRANSFORM)
+	return " rotate_90";
+    else if (flag & FAST_PATH_ROTATE_180_TRANSFORM)
+	return "rotate_180";
+    else if (flag & FAST_PATH_ROTATE_270_TRANSFORM)
+	return "rotate_270";
+
+    return "    affine";
+}
+
+static const char*
+get_repeat_string (uint32_t flag)
+{
+    if (flag & FAST_PATH_SAMPLES_COVER_CLIP)
+	return "  cover";
+    else if ((flag & FAST_PATH_NORMAL_REPEAT) == FAST_PATH_NORMAL_REPEAT)
+	return " normal";
+     else if ((flag & FAST_PATH_PAD_REPEAT) == FAST_PATH_PAD_REPEAT)
+	return "    pad";
+    else if ((flag & FAST_PATH_NONE_REPEAT) == FAST_PATH_NONE_REPEAT)
+	return "   none";
+    else if ((flag & FAST_PATH_REFLECT_REPEAT) == FAST_PATH_REFLECT_REPEAT)
+	return "reflect";
+
+    return "unknown";
+}
+
+pixman_bool_t
+perfstat_is_enabled (void)
+{
+    return enable_perfstat;
+}
+
+static void __attribute__((constructor))
+perfstat_constructor (void)
+{
+    if (getenv ("PIXMAN_ENABLE_PERF_STAT"))
+	enable_perfstat = TRUE;
+
+    PIXMAN_MUTEX_INIT (perfstat_mutex);
+}
+
+static void __attribute__((destructor))
+perfstat_destructor (void)
+{
+    int i;
+    int32_t total_images = 0;
+    int32_t total_scanlines = 0;
+    uint64_t total_pixels = 0;
+    double total_time = 0.0;
+
+    PIXMAN_MUTEX_FINI (perfstat_mutex);
+
+    if (!enable_perfstat)
+	return;
+
+    /* sort entries */
+    qsort (perfstat, perfstat_entry_count, sizeof(perfstat[0]), compare);
+
+    fprintf (PERFSTAT_LOG_CHANNEL,
+	     "[   #]  function    transform       op   src  mask   dst   repeat    "
+	     "images  scanlines  pixels(Mpix)   time(s)  speed(Mpix/s)   backend\n");
+
+    /* show performance statistics */
+    for (i = 0; i < perfstat_entry_count; ++i)
+    {
+	fprintf (PERFSTAT_LOG_CHANNEL, "[%4d] ", i);
+
+	if (perfstat[i].type == PERFSTAT_COMPOSITE)
+	{
+	    fprintf (PERFSTAT_LOG_CHANNEL,
+		     "composite : "
+		     "%s %s  %s  %s  %s  %s  ",
+		     get_transform_string (perfstat[i].api.composite.src_flags),
+		     get_op_string (perfstat[i].api.composite.op),
+		     get_format_string (perfstat[i].api.composite.src),
+		     get_format_string (perfstat[i].api.composite.mask),
+		     get_format_string (perfstat[i].api.composite.dest),
+		     get_repeat_string (perfstat[i].api.composite.src_flags));
+	}
+	else if (perfstat[i].type == PERFSTAT_FILL)
+	{
+	    fprintf (PERFSTAT_LOG_CHANNEL,
+		     "     fill : "
+		     "                                 %4d           ",
+		     perfstat[i].api.fill.bpp);
+	}
+	else if (perfstat[i].type == PERFSTAT_BLT)
+	{
+	    fprintf (PERFSTAT_LOG_CHANNEL,
+		     "      blt : "
+		     "                   %4d           %4d          ",
+		     perfstat[i].api.blt.src_bpp,
+		     perfstat[i].api.blt.dst_bpp);
+	}
+
+	fprintf (PERFSTAT_LOG_CHANNEL,
+		 "%8d  %9d      %8.3f  %8.3f      %8.3f   %s\n",
+		 perfstat[i].images_count,
+		 perfstat[i].scanlines_count,
+		 perfstat[i].pixels_count / 1000000.0,
+		 perfstat[i].time,
+		 (perfstat[i].pixels_count / perfstat[i].time) / 1000000.0,
+		 get_implementation_string (perfstat[i].imp_type));
+
+	total_images += perfstat[i].images_count;
+	total_scanlines += perfstat[i].scanlines_count;
+	total_pixels += perfstat[i].pixels_count;
+	total_time += perfstat[i].time;
+    }
+
+    printf ("Total images    : %d\n", total_images);
+    printf ("Total scanlines : %d\n", total_scanlines);
+    printf ("Total pixels    : %.3f Mpix\n", total_pixels / 1000000.0);
+    printf ("Total time      : %.6f s\n", total_time);
+}
+
+#endif
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 4541bcd..a16628d 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -937,4 +937,50 @@ void pixman_timer_register (pixman_timer_t *timer);
 
 #endif /* PIXMAN_TIMERS */
 
+/*
+ * Performance statistics analyzer
+ */
+
+double
+perfstat_get_time (void);
+
+void
+perfstat_add_composite (pixman_implementation_type_t	imp_type,
+			pixman_op_t			op,
+			pixman_format_code_t		src,
+			uint32_t			src_flags,
+			pixman_format_code_t		mask,
+			uint32_t			mask_flags,
+			pixman_format_code_t		dest,
+			uint32_t			dest_flags,
+			int32_t				width,
+			int32_t				height,
+			double				elapsed);
+
+void
+perfstat_add_fill (pixman_implementation_type_t	imp_type,
+		   int32_t			bpp,
+		   int32_t			width,
+		   int32_t			height,
+		   double			elapsed);
+
+void
+perfstat_add_blt (pixman_implementation_type_t	imp_type,
+		  int32_t			src_bpp,
+		  int32_t			dst_bpp,
+		  int32_t			width,
+		  int32_t			height,
+		  double			elapsed);
+
+#ifndef PIXMAN_ENABLE_PERFSTAT
+static inline pixman_bool_t
+perfstat_is_enabled (void)
+{
+    return FALSE;
+}
+#else
+pixman_bool_t
+perfstat_is_enabled (void);
+#endif /* PIXMAN_ENABLE_PERFSTAT */
+
 #endif /* PIXMAN_PRIVATE_H */
-- 
1.7.1



More information about the Pixman mailing list