[Intel-gfx] [PATCH i-g-t 17/18] stats: Add support for quartiles (and thus median)
Damien Lespiau
damien.lespiau at intel.com
Sat Jun 27 08:08:15 PDT 2015
More stuff, quite useful characteristics of a dataset.
Signed-off-by: Damien Lespiau <damien.lespiau at intel.com>
---
lib/igt_stats.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++
lib/igt_stats.h | 5 ++
lib/tests/igt_stats.c | 41 ++++++++++++++++
3 files changed, 177 insertions(+)
diff --git a/lib/igt_stats.c b/lib/igt_stats.c
index 5c1f23f..66fd563 100644
--- a/lib/igt_stats.c
+++ b/lib/igt_stats.c
@@ -23,6 +23,7 @@
*/
#include <math.h>
+#include <stdlib.h>
#include <string.h>
#include "igt_core.h"
@@ -94,6 +95,7 @@ void igt_stats_init(igt_stats_t *stats, unsigned int capacity)
void igt_stats_fini(igt_stats_t *stats)
{
free(stats->values);
+ free(stats->sorted);
}
@@ -158,6 +160,7 @@ void igt_stats_push(igt_stats_t *stats, uint64_t value)
stats->values[stats->n_values++] = value;
stats->mean_variance_valid = false;
+ stats->sorted_array_valid = false;
if (value < stats->min)
stats->min = value;
@@ -220,6 +223,134 @@ uint64_t igt_stats_get_range(igt_stats_t *stats)
return igt_stats_get_max(stats) - igt_stats_get_min(stats);
}
+static int cmp_u64(const void *pa, const void *pb)
+{
+ const uint64_t *a = pa, *b = pb;
+
+ if (*a < *b)
+ return -1;
+ if (*a > *b)
+ return 1;
+ return 0;
+}
+
+static void igt_stats_ensure_sorted_values(igt_stats_t *stats)
+{
+ if (stats->sorted_array_valid)
+ return;
+
+ if (!stats->sorted) {
+ stats->sorted = calloc(stats->capacity, sizeof(*stats->values));
+ igt_assert(stats->sorted);
+ }
+
+ memcpy(stats->sorted, stats->values,
+ sizeof(*stats->values) * stats->n_values);
+
+ qsort(stats->sorted, stats->n_values, sizeof(*stats->values), cmp_u64);
+
+ stats->sorted_array_valid = true;
+}
+
+/*
+ * We use Tukey's hinge for our quartiles determination.
+ * ends (end, lower_end) are exclusive.
+ */
+static double
+igt_stats_get_median_internal(igt_stats_t *stats,
+ unsigned int start, unsigned int end,
+ unsigned int *lower_end /* out */,
+ unsigned int *upper_start /* out */)
+{
+ unsigned int mid, n_values = end - start;
+ double median;
+
+ igt_stats_ensure_sorted_values(stats);
+
+ /* odd number of data points */
+ if (n_values % 2 == 1) {
+ /* median is the value in the middle (actual datum) */
+ mid = start + n_values / 2;
+ median = stats->sorted[mid];
+
+ /* the two halves contain the median value */
+ if (lower_end)
+ *lower_end = mid + 1;
+ if (upper_start)
+ *upper_start = mid;
+
+ /* even number of data points */
+ } else {
+ /*
+ * The middle is in between two indexes, 'mid' points at the
+ * lower one. The median is then the average between those two
+ * values.
+ */
+ mid = start + n_values / 2 - 1;
+ median = (stats->sorted[mid] + stats->sorted[mid + 1]) / 2.;
+
+ if (lower_end)
+ *lower_end = mid + 1;
+ if (upper_start)
+ *upper_start = mid + 1;
+ }
+
+ return median;
+}
+
+/**
+ * igt_stats_get_quartiles:
+ * @stats: An #igt_stats_t instance
+ * @q1: (out): lower or 25th quartile
+ * @q2: (out): median or 50th quartile
+ * @q3: (out): upper or 75th quartile
+ *
+ * Retrieves the [quartiles](https://en.wikipedia.org/wiki/Quartile) of the
+ * @stats dataset.
+ */
+void igt_stats_get_quartiles(igt_stats_t *stats,
+ double *q1, double *q2, double *q3)
+{
+ unsigned int lower_end, upper_start;
+ double ret;
+
+ if (stats->n_values < 3) {
+ if (q1)
+ *q1 = 0.;
+ if (q2)
+ *q2 = 0.;
+ if (q3)
+ *q3 = 0.;
+ return;
+ }
+
+ ret = igt_stats_get_median_internal(stats, 0, stats->n_values,
+ &lower_end, &upper_start);
+ if (q2)
+ *q2 = ret;
+
+ ret = igt_stats_get_median_internal(stats, 0, lower_end, NULL, NULL);
+ if (q1)
+ *q1 = ret;
+
+ ret = igt_stats_get_median_internal(stats, upper_start, stats->n_values,
+ NULL, NULL);
+ if (q3)
+ *q3 = ret;
+}
+
+/**
+ * igt_stats_get_median:
+ * @stats: An #igt_stats_t instance
+ *
+ * Retrieves the median of the @stats dataset.
+ */
+double igt_stats_get_median(igt_stats_t *stats)
+{
+ return igt_stats_get_median_internal(stats, 0, stats->n_values,
+ NULL, NULL);
+}
+
/*
* Algorithm popularised by Knuth in:
*
diff --git a/lib/igt_stats.h b/lib/igt_stats.h
index 46437c4..887fa79 100644
--- a/lib/igt_stats.h
+++ b/lib/igt_stats.h
@@ -41,8 +41,10 @@ typedef struct {
unsigned int capacity;
unsigned int is_population : 1;
unsigned int mean_variance_valid : 1;
+ unsigned int sorted_array_valid : 1;
uint64_t min, max;
double mean, variance;
+ uint64_t *sorted;
} igt_stats_t;
void igt_stats_init(igt_stats_t *stats, unsigned int capacity);
@@ -55,7 +57,10 @@ void igt_stats_push_array(igt_stats_t *stats,
uint64_t igt_stats_get_min(igt_stats_t *stats);
uint64_t igt_stats_get_max(igt_stats_t *stats);
uint64_t igt_stats_get_range(igt_stats_t *stats);
+void igt_stats_get_quartiles(igt_stats_t *stats,
+ double *q1, double *q2, double *q3);
double igt_stats_get_mean(igt_stats_t *stats);
+double igt_stats_get_median(igt_stats_t *stats);
double igt_stats_get_variance(igt_stats_t *stats);
double igt_stats_get_std_deviation(igt_stats_t *stats);
diff --git a/lib/tests/igt_stats.c b/lib/tests/igt_stats.c
index 468bba2..c4c6776 100644
--- a/lib/tests/igt_stats.c
+++ b/lib/tests/igt_stats.c
@@ -25,6 +25,8 @@
#include "igt_core.h"
#include "igt_stats.h"
+#define ARRAY_SIZE(arr) (sizeof(arr)/sizeof(arr[0]))
+
static void push_fixture_1(igt_stats_t *stats)
{
igt_stats_push(stats, 2);
@@ -78,6 +80,44 @@ static void test_range(void)
igt_assert(igt_stats_get_range(&stats) == 8);
}
+/*
+ * Examples taken from: https://en.wikipedia.org/wiki/Quartile
+ * The values are shifted a bit to test we do indeed start by sorting data
+ * set.
+ */
+static void test_quartiles(void)
+{
+ static const uint64_t s1[] =
+ { 47, 49, 6, 7, 15, 36, 39, 40, 41, 42, 43 };
+ static const uint64_t s2[] = { 40, 41, 7, 15, 36, 39 };
+ igt_stats_t stats;
+ double q1, q2, q3;
+
+ /* s1, odd number of data points */
+ igt_stats_init(&stats, ARRAY_SIZE(s1));
+ igt_stats_push_array(&stats, s1, ARRAY_SIZE(s1));
+
+ igt_stats_get_quartiles(&stats, &q1, &q2, &q3);
+ igt_assert_eq_double(q1, 25.5);
+ igt_assert_eq_double(q2, 40);
+ igt_assert_eq_double(q3, 42.5);
+ igt_assert_eq_double(igt_stats_get_median(&stats), 40);
+
+ igt_stats_fini(&stats);
+
+ /* s1, even number of data points */
+ igt_stats_init(&stats, ARRAY_SIZE(s2));
+ igt_stats_push_array(&stats, s2, ARRAY_SIZE(s2));
+
+ igt_stats_get_quartiles(&stats, &q1, &q2, &q3);
+ igt_assert_eq_double(q1, 15);
+ igt_assert_eq_double(q2, 37.5);
+ igt_assert_eq_double(q3, 40);
+ igt_assert_eq_double(igt_stats_get_median(&stats), 37.5);
+
+ igt_stats_fini(&stats);
+}
+
static void test_mean(void)
{
igt_stats_t stats;
@@ -150,6 +190,7 @@ igt_simple_main
test_init();
test_min_max();
test_range();
+ test_quartiles();
test_mean();
test_invalidate_mean();
test_std_deviation();
--
2.1.0
More information about the Intel-gfx
mailing list