[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH v4 09/14] qdist: add module to represent frequency d
From: |
Emilio G. Cota |
Subject: |
[Qemu-devel] [PATCH v4 09/14] qdist: add module to represent frequency distributions of data |
Date: |
Fri, 29 Apr 2016 23:33:12 -0400 |
Sometimes it is useful to have a quick histogram to represent a certain
distribution -- for example, when investigating a performance regression
in a hash table due to inadequate hashing.
The appended allows us to easily represent a distribution using Unicode
characters. Further, the data structure keeping track of the distribution
is so simple that obtaining its values for off-line processing is trivial.
Example, taking the last 10 commits to QEMU:
Characters in commit title Count
-----------------------------------
39 1
48 1
53 1
54 2
57 1
61 1
67 1
78 1
80 1
qdist_init(&dist);
qdist_inc(&dist, 39);
[...]
qdist_inc(&dist, 80);
char *str = qdist_pr(&dist, 9, QDIST_PR_LABELS);
// -> [39.0,43.6)▂▂ █▂ ▂ ▄[75.4,80.0]
g_free(str);
char *str = qdist_pr(&dist, 4, QDIST_PR_LABELS);
// -> [39.0,49.2)▁█▁▁[69.8,80.0]
g_free(str);
Signed-off-by: Emilio G. Cota <address@hidden>
---
include/qemu/qdist.h | 62 +++++++++
util/Makefile.objs | 2 +-
util/qdist.c | 386 +++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 449 insertions(+), 1 deletion(-)
create mode 100644 include/qemu/qdist.h
create mode 100644 util/qdist.c
diff --git a/include/qemu/qdist.h b/include/qemu/qdist.h
new file mode 100644
index 0000000..6d8b701
--- /dev/null
+++ b/include/qemu/qdist.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2016, Emilio G. Cota <address@hidden>
+ *
+ * License: GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_QDIST_H
+#define QEMU_QDIST_H
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/bitops.h"
+
+/*
+ * Samples with the same 'x value' end up in the same qdist_entry,
+ * e.g. inc(0.1) and inc(0.1) end up as {x=0.1, count=2}.
+ *
+ * Binning happens only at print time, so that we retain the flexibility to
+ * choose the binning. This might not be ideal for workloads that do not care
+ * much about precision and insert many samples all with different x values;
+ * in that case, pre-binning (e.g. entering both 0.115 and 0.097 as 0.1)
+ * should be considered.
+ */
+struct qdist_entry {
+ double x;
+ unsigned long count;
+};
+
+struct qdist {
+ struct qdist_entry *entries;
+ size_t n;
+};
+
+#define QDIST_PR_BORDER BIT(0)
+#define QDIST_PR_LABELS BIT(1)
+/* the remaining options only work if PR_LABELS is set */
+#define QDIST_PR_NODECIMAL BIT(2)
+#define QDIST_PR_PERCENT BIT(3)
+#define QDIST_PR_100X BIT(4)
+#define QDIST_PR_NOBINRANGE BIT(5)
+
+void qdist_init(struct qdist *dist);
+void qdist_destroy(struct qdist *dist);
+
+void qdist_add(struct qdist *dist, double x, long count);
+void qdist_inc(struct qdist *dist, double x);
+double qdist_xmin(const struct qdist *dist);
+double qdist_xmax(const struct qdist *dist);
+double qdist_avg(const struct qdist *dist);
+unsigned long qdist_sample_count(const struct qdist *dist);
+size_t qdist_unique_entries(const struct qdist *dist);
+
+/* callers must free the returned string with g_free() */
+char *qdist_pr_plain(const struct qdist *dist, size_t n_groups);
+
+/* callers must free the returned string with g_free() */
+char *qdist_pr(const struct qdist *dist, size_t n_groups, uint32_t opt);
+
+/* Only qdist code and test code should ever call this function */
+void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n);
+
+#endif /* QEMU_QDIST_H */
diff --git a/util/Makefile.objs b/util/Makefile.objs
index a8a777e..5985a2e 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -1,4 +1,4 @@
-util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o
+util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o qdist.o
util-obj-$(CONFIG_POSIX) += compatfd.o
util-obj-$(CONFIG_POSIX) += event_notifier-posix.o
util-obj-$(CONFIG_POSIX) += mmap-alloc.o
diff --git a/util/qdist.c b/util/qdist.c
new file mode 100644
index 0000000..3343640
--- /dev/null
+++ b/util/qdist.c
@@ -0,0 +1,386 @@
+/*
+ * qdist.c - QEMU helpers for handling frequency distributions of data.
+ *
+ * Copyright (C) 2016, Emilio G. Cota <address@hidden>
+ *
+ * License: GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/qdist.h"
+
+#include <math.h>
+#ifndef NAN
+#define NAN (0.0 / 0.0)
+#endif
+
+void qdist_init(struct qdist *dist)
+{
+ dist->entries = NULL;
+ dist->n = 0;
+}
+
+void qdist_destroy(struct qdist *dist)
+{
+ g_free(dist->entries);
+}
+
+static inline int qdist_cmp_double(double a, double b)
+{
+ if (a > b) {
+ return 1;
+ } else if (a < b) {
+ return -1;
+ }
+ return 0;
+}
+
+static int qdist_cmp(const void *ap, const void *bp)
+{
+ const struct qdist_entry *a = ap;
+ const struct qdist_entry *b = bp;
+
+ return qdist_cmp_double(a->x, b->x);
+}
+
+void qdist_add(struct qdist *dist, double x, long count)
+{
+ struct qdist_entry *entry = NULL;
+
+ if (dist->entries) {
+ struct qdist_entry e;
+
+ e.x = x;
+ entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp);
+ }
+
+ if (entry) {
+ entry->count += count;
+ return;
+ }
+
+ dist->entries = g_realloc(dist->entries,
+ sizeof(*dist->entries) * (dist->n + 1));
+ dist->n++;
+ entry = &dist->entries[dist->n - 1];
+ entry->x = x;
+ entry->count = count;
+ qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp);
+}
+
+void qdist_inc(struct qdist *dist, double x)
+{
+ qdist_add(dist, x, 1);
+}
+
+/*
+ * Unicode for block elements. See:
+ * https://en.wikipedia.org/wiki/Block_Elements
+ */
+static const gunichar qdist_blocks[] = {
+ 0x2581,
+ 0x2582,
+ 0x2583,
+ 0x2584,
+ 0x2585,
+ 0x2586,
+ 0x2587,
+ 0x2588
+};
+
+#define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)
+
+/*
+ * Print a distribution into a string.
+ *
+ * This function assumes that appropriate binning has been done on the input;
+ * see qdist_bin__internal() and qdist_pr_plain().
+ *
+ * Callers must free the returned string with g_free().
+ */
+static char *qdist_pr_internal(const struct qdist *dist)
+{
+ double min, max, step;
+ GString *s = g_string_new("");
+ size_t i;
+
+ /* if only one entry, its printout will be either full or empty */
+ if (dist->n == 1) {
+ if (dist->entries[0].count) {
+ g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]);
+ } else {
+ g_string_append_c(s, ' ');
+ }
+ goto out;
+ }
+
+ /* get min and max counts */
+ min = dist->entries[0].count;
+ max = min;
+ for (i = 0; i < dist->n; i++) {
+ struct qdist_entry *e = &dist->entries[i];
+
+ if (e->count < min) {
+ min = e->count;
+ }
+ if (e->count > max) {
+ max = e->count;
+ }
+ }
+
+ /* floor((count - min) * step) will give us the block index */
+ step = (QDIST_NR_BLOCK_CODES - 1) / (max - min);
+
+ for (i = 0; i < dist->n; i++) {
+ struct qdist_entry *e = &dist->entries[i];
+ int index;
+
+ /* make an exception with 0; instead of using block[0], print a space
*/
+ if (e->count) {
+ index = (int)((e->count - min) * step);
+ g_string_append_unichar(s, qdist_blocks[index]);
+ } else {
+ g_string_append_c(s, ' ');
+ }
+ }
+ out:
+ return g_string_free(s, FALSE);
+}
+
+/*
+ * Bin the distribution in @from into @n bins of consecutive, non-overlapping
+ * intervals, copying the result to @to.
+ *
+ * This function is internal to qdist: only this file and test code should
+ * ever call it.
+ *
+ * Note: calling this function on an already-binned qdist is a bug.
+ *
+ * If @n == 0 or @from->n == 1, use @from->n.
+ */
+void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n)
+{
+ double xmin, xmax;
+ double step;
+ size_t i, j, j_min;
+
+ qdist_init(to);
+
+ if (!from->entries) {
+ return;
+ }
+ if (!n || from->n == 1) {
+ n = from->n;
+ }
+
+ /* set equally-sized bins between @from's left and right */
+ xmin = qdist_xmin(from);
+ xmax = qdist_xmax(from);
+ step = (xmax - xmin) / n;
+
+ if (n == from->n) {
+ /* if @from's entries are equally spaced, no need to re-bin */
+ for (i = 0; i < from->n; i++) {
+ if (from->entries[i].x != xmin + i * step) {
+ goto rebin;
+ }
+ }
+ /* they're equally spaced, so copy the dist and bail out */
+ to->entries = g_malloc(sizeof(*to->entries) * from->n);
+ to->n = from->n;
+ memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n);
+ return;
+ }
+
+ rebin:
+ j_min = 0;
+ for (i = 0; i < n; i++) {
+ double x;
+ double left, right;
+
+ left = xmin + i * step;
+ right = xmin + (i + 1) * step;
+
+ /* Add x, even if it might not get any counts later */
+ x = left;
+ qdist_add(to, x, 0);
+
+ /*
+ * To avoid double-counting we capture [left, right) ranges, except for
+ * the righmost bin, which captures a [left, right] range.
+ */
+ for (j = j_min; j < from->n; j++) {
+ struct qdist_entry *o = &from->entries[j];
+
+ /* entries are ordered so do not check beyond right */
+ if (o->x > right) {
+ break;
+ }
+ if (o->x >= left && (o->x < right ||
+ (i == n - 1 && o->x == right))) {
+ qdist_add(to, x, o->count);
+ /* don't check this entry again */
+ j_min = j + 1;
+ }
+ }
+ }
+}
+
+/*
+ * Print @dist into a string, after re-binning it into @n bins of consecutive,
+ * non-overlapping intervals.
+ *
+ * If @n == 0, use @orig->n.
+ *
+ * Callers must free the returned string with g_free().
+ */
+char *qdist_pr_plain(const struct qdist *dist, size_t n)
+{
+ struct qdist binned;
+ char *ret;
+
+ if (!dist->entries) {
+ return NULL;
+ }
+ qdist_bin__internal(&binned, dist, n);
+ ret = qdist_pr_internal(&binned);
+ qdist_destroy(&binned);
+ return ret;
+}
+
+static char *qdist_pr_label(const struct qdist *dist, size_t n_bins,
+ uint32_t opt, bool is_left)
+{
+ const char *percent;
+ const char *lparen;
+ const char *rparen;
+ GString *s;
+ double x1, x2, step;
+ double x;
+ double n;
+ int dec;
+
+ s = g_string_new("");
+ if (!(opt & QDIST_PR_LABELS)) {
+ goto out;
+ }
+
+ dec = opt & QDIST_PR_NODECIMAL ? 0 : 1;
+ percent = opt & QDIST_PR_PERCENT ? "%" : "";
+
+ n = n_bins ? n_bins : dist->n;
+ x = is_left ? qdist_xmin(dist) : qdist_xmax(dist);
+ step = (qdist_xmax(dist) - qdist_xmin(dist)) / n;
+
+ if (opt & QDIST_PR_100X) {
+ x *= 100.0;
+ step *= 100.0;
+ }
+ if (opt & QDIST_PR_NOBINRANGE) {
+ lparen = rparen = "";
+ x1 = x;
+ x2 = x; /* unnecessary, but a dumb compiler might not figure it out */
+ } else {
+ lparen = "[";
+ rparen = is_left ? ")" : "]";
+ if (is_left) {
+ x1 = x;
+ x2 = x + step;
+ } else {
+ x1 = x - step;
+ x2 = x;
+ }
+ }
+ g_string_append_printf(s, "%s%.*f", lparen, dec, x1);
+ if (!(opt & QDIST_PR_NOBINRANGE)) {
+ g_string_append_printf(s, ",%.*f%s", dec, x2, rparen);
+ }
+ g_string_append(s, percent);
+ out:
+ return g_string_free(s, FALSE);
+}
+
+/*
+ * Print the distribution's histogram into a string.
+ *
+ * See also: qdist_pr_plain().
+ *
+ * Callers must free the returned string with g_free().
+ */
+char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt)
+{
+ const char *border = opt & QDIST_PR_BORDER ? "|" : "";
+ char *llabel, *rlabel;
+ char *hgram;
+ GString *s;
+
+ if (dist->entries == NULL) {
+ return NULL;
+ }
+
+ s = g_string_new("");
+
+ llabel = qdist_pr_label(dist, n_bins, opt, true);
+ rlabel = qdist_pr_label(dist, n_bins, opt, false);
+ hgram = qdist_pr_plain(dist, n_bins);
+ g_string_append_printf(s, "%s%s%s%s%s",
+ llabel, border, hgram, border, rlabel);
+ g_free(llabel);
+ g_free(rlabel);
+ g_free(hgram);
+
+ return g_string_free(s, FALSE);
+}
+
+static inline double qdist_x(const struct qdist *dist, int index)
+{
+ if (dist->entries == NULL) {
+ return NAN;
+ }
+ return dist->entries[index].x;
+}
+
+double qdist_xmin(const struct qdist *dist)
+{
+ return qdist_x(dist, 0);
+}
+
+double qdist_xmax(const struct qdist *dist)
+{
+ return qdist_x(dist, dist->n - 1);
+}
+
+size_t qdist_unique_entries(const struct qdist *dist)
+{
+ return dist->n;
+}
+
+unsigned long qdist_sample_count(const struct qdist *dist)
+{
+ unsigned long count = 0;
+ size_t i;
+
+ for (i = 0; i < dist->n; i++) {
+ struct qdist_entry *e = &dist->entries[i];
+
+ count += e->count;
+ }
+ return count;
+}
+
+double qdist_avg(const struct qdist *dist)
+{
+ unsigned long count;
+ size_t i;
+ double ret = 0;
+
+ count = qdist_sample_count(dist);
+ if (!count) {
+ return NAN;
+ }
+ for (i = 0; i < dist->n; i++) {
+ struct qdist_entry *e = &dist->entries[i];
+
+ ret += e->x * e->count / count;
+ }
+ return ret;
+}
--
2.5.0
- [Qemu-devel] [PATCH v4 03/14] seqlock: rename write_lock/unlock to write_begin/end, (continued)
- [Qemu-devel] [PATCH v4 03/14] seqlock: rename write_lock/unlock to write_begin/end, Emilio G. Cota, 2016/04/29
- [Qemu-devel] [PATCH v4 10/14] qdist: add test program, Emilio G. Cota, 2016/04/29
- [Qemu-devel] [PATCH v4 05/14] atomics: add atomic_test_and_set, Emilio G. Cota, 2016/04/29
- [Qemu-devel] [PATCH v4 11/14] qht: QEMU's fast, resizable and scalable Hash Table, Emilio G. Cota, 2016/04/29
- [Qemu-devel] [PATCH v4 06/14] qemu-thread: add simple test-and-set spinlock, Emilio G. Cota, 2016/04/29
- [Qemu-devel] [PATCH v4 14/14] translate-all: add tb hash bucket info to 'info jit' dump, Emilio G. Cota, 2016/04/29
- [Qemu-devel] [PATCH v4 02/14] seqlock: remove optional mutex, Emilio G. Cota, 2016/04/29
- [Qemu-devel] [PATCH v4 07/14] exec: add tb_hash_func5, derived from xxhash, Emilio G. Cota, 2016/04/29
- [Qemu-devel] [PATCH v4 12/14] qht: add test program, Emilio G. Cota, 2016/04/29
- [Qemu-devel] [PATCH v4 13/14] tb hash: track translated blocks with qht, Emilio G. Cota, 2016/04/29
- [Qemu-devel] [PATCH v4 09/14] qdist: add module to represent frequency distributions of data,
Emilio G. Cota <=