summaryrefslogtreecommitdiff
path: root/lib/ccan/tally
diff options
context:
space:
mode:
Diffstat (limited to 'lib/ccan/tally')
-rw-r--r--lib/ccan/tally/LICENSE165
-rw-r--r--lib/ccan/tally/_info58
-rw-r--r--lib/ccan/tally/tally.c490
-rw-r--r--lib/ccan/tally/tally.h103
-rw-r--r--lib/ccan/tally/test/run-bucket_of.c71
-rw-r--r--lib/ccan/tally/test/run-divlu64.c31
-rw-r--r--lib/ccan/tally/test/run-histogram.c108
-rw-r--r--lib/ccan/tally/test/run-mean.c29
-rw-r--r--lib/ccan/tally/test/run-median.c45
-rw-r--r--lib/ccan/tally/test/run-min-max.c20
-rw-r--r--lib/ccan/tally/test/run-mode.c45
-rw-r--r--lib/ccan/tally/test/run-renormalize.c25
-rw-r--r--lib/ccan/tally/test/run-total.c56
13 files changed, 1246 insertions, 0 deletions
diff --git a/lib/ccan/tally/LICENSE b/lib/ccan/tally/LICENSE
new file mode 100644
index 0000000000..cca7fc278f
--- /dev/null
+++ b/lib/ccan/tally/LICENSE
@@ -0,0 +1,165 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+ This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+
+ 0. Additional Definitions.
+
+ As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+
+ "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+
+ An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+
+ A "Combined Work" is a work produced by combining or linking an
+Application with the Library. The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+
+ The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+
+ The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+
+ 1. Exception to Section 3 of the GNU GPL.
+
+ You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+
+ 2. Conveying Modified Versions.
+
+ If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+
+ a) under this License, provided that you make a good faith effort to
+ ensure that, in the event an Application does not supply the
+ function or data, the facility still operates, and performs
+ whatever part of its purpose remains meaningful, or
+
+ b) under the GNU GPL, with none of the additional permissions of
+ this License applicable to that copy.
+
+ 3. Object Code Incorporating Material from Library Header Files.
+
+ The object code form of an Application may incorporate material from
+a header file that is part of the Library. You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+
+ a) Give prominent notice with each copy of the object code that the
+ Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the object code with a copy of the GNU GPL and this license
+ document.
+
+ 4. Combined Works.
+
+ You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+
+ a) Give prominent notice with each copy of the Combined Work that
+ the Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the Combined Work with a copy of the GNU GPL and this license
+ document.
+
+ c) For a Combined Work that displays copyright notices during
+ execution, include the copyright notice for the Library among
+ these notices, as well as a reference directing the user to the
+ copies of the GNU GPL and this license document.
+
+ d) Do one of the following:
+
+ 0) Convey the Minimal Corresponding Source under the terms of this
+ License, and the Corresponding Application Code in a form
+ suitable for, and under terms that permit, the user to
+ recombine or relink the Application with a modified version of
+ the Linked Version to produce a modified Combined Work, in the
+ manner specified by section 6 of the GNU GPL for conveying
+ Corresponding Source.
+
+ 1) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (a) uses at run time
+ a copy of the Library already present on the user's computer
+ system, and (b) will operate properly with a modified version
+ of the Library that is interface-compatible with the Linked
+ Version.
+
+ e) Provide Installation Information, but only if you would otherwise
+ be required to provide such information under section 6 of the
+ GNU GPL, and only to the extent that such information is
+ necessary to install and execute a modified version of the
+ Combined Work produced by recombining or relinking the
+ Application with a modified version of the Linked Version. (If
+ you use option 4d0, the Installation Information must accompany
+ the Minimal Corresponding Source and Corresponding Application
+ Code. If you use option 4d1, you must provide the Installation
+ Information in the manner specified by section 6 of the GNU GPL
+ for conveying Corresponding Source.)
+
+ 5. Combined Libraries.
+
+ You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+
+ a) Accompany the combined library with a copy of the same work based
+ on the Library, uncombined with any other library facilities,
+ conveyed under the terms of this License.
+
+ b) Give prominent notice with the combined library that part of it
+ is a work based on the Library, and explaining where to find the
+ accompanying uncombined form of the same work.
+
+ 6. Revised Versions of the GNU Lesser General Public License.
+
+ The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+
+ If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.
diff --git a/lib/ccan/tally/_info b/lib/ccan/tally/_info
new file mode 100644
index 0000000000..1d67274f5c
--- /dev/null
+++ b/lib/ccan/tally/_info
@@ -0,0 +1,58 @@
+#include <stdio.h>
+#include <string.h>
+#include "config.h"
+
+/**
+ * tally - running tally of integers
+ *
+ * The tally module implements simple analysis of a stream of integers.
+ * Numbers are fed in via tally_add(), and then the mean, median, mode and
+ * a histogram can be read out.
+ *
+ * Example:
+ * #include <stdio.h>
+ * #include <err.h>
+ * #include <ccan/tally/tally.h>
+ *
+ * int main(int argc, char *argv[])
+ * {
+ * struct tally *t;
+ * unsigned int i;
+ * size_t err;
+ * ssize_t val;
+ * char *histogram;
+ *
+ * if (argc < 2)
+ * errx(1, "Usage: %s <number>...\n", argv[0]);
+ *
+ * t = tally_new(100);
+ * for (i = 1; i < argc; i++)
+ * tally_add(t, atol(argv[i]));
+ *
+ * printf("Mean = %zi\n", tally_mean(t));
+ * val = tally_approx_median(t, &err);
+ * printf("Median = %zi (+/- %zu)\n", val, err);
+ * val = tally_approx_mode(t, &err);
+ * printf("Mode = %zi (+/- %zu)\n", val, err);
+ * histogram = tally_histogram(t, 50, 10);
+ * printf("Histogram:\n%s", histogram);
+ * free(histogram);
+ * return 0;
+ * }
+ *
+ * License: LGPL (3 or any later version)
+ * Author: Rusty Russell <rusty@rustcorp.com.au>
+ */
+int main(int argc, char *argv[])
+{
+ if (argc != 2)
+ return 1;
+
+ if (strcmp(argv[1], "depends") == 0) {
+ printf("ccan/build_assert\n");
+ printf("ccan/likely\n");
+ return 0;
+ }
+
+ return 1;
+}
diff --git a/lib/ccan/tally/tally.c b/lib/ccan/tally/tally.c
new file mode 100644
index 0000000000..0d01907955
--- /dev/null
+++ b/lib/ccan/tally/tally.c
@@ -0,0 +1,490 @@
+#include "config.h"
+#include <ccan/tally/tally.h>
+#include <ccan/build_assert/build_assert.h>
+#include <ccan/likely/likely.h>
+#include <stdint.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+
+#define SIZET_BITS (sizeof(size_t)*CHAR_BIT)
+
+/* We use power of 2 steps. I tried being tricky, but it got buggy. */
+struct tally {
+ ssize_t min, max;
+ size_t total[2];
+ /* This allows limited frequency analysis. */
+ unsigned buckets, step_bits;
+ size_t counts[1 /* Actually: [buckets] */ ];
+};
+
+struct tally *tally_new(unsigned buckets)
+{
+ struct tally *tally;
+
+ /* There is always 1 bucket. */
+ if (buckets == 0)
+ buckets = 1;
+
+ /* Check for overflow. */
+ if (buckets && SIZE_MAX / buckets < sizeof(tally->counts[0]))
+ return NULL;
+ tally = malloc(sizeof(*tally) + sizeof(tally->counts[0])*(buckets-1));
+ if (tally) {
+ tally->max = ((size_t)1 << (SIZET_BITS - 1));
+ tally->min = ~tally->max;
+ tally->total[0] = tally->total[1] = 0;
+ tally->buckets = buckets;
+ tally->step_bits = 0;
+ memset(tally->counts, 0, sizeof(tally->counts[0])*buckets);
+ }
+ return tally;
+}
+
+static unsigned bucket_of(ssize_t min, unsigned step_bits, ssize_t val)
+{
+ /* Don't over-shift. */
+ if (step_bits == SIZET_BITS)
+ return 0;
+ assert(step_bits < SIZET_BITS);
+ return (size_t)(val - min) >> step_bits;
+}
+
+/* Return the min value in bucket b. */
+static ssize_t bucket_min(ssize_t min, unsigned step_bits, unsigned b)
+{
+ /* Don't over-shift. */
+ if (step_bits == SIZET_BITS)
+ return min;
+ assert(step_bits < SIZET_BITS);
+ return min + ((ssize_t)b << step_bits);
+}
+
+/* Does shifting by this many bits truncate the number? */
+static bool shift_overflows(size_t num, unsigned bits)
+{
+ if (bits == 0)
+ return false;
+
+ return ((num << bits) >> 1) != (num << (bits - 1));
+}
+
+/* When min or max change, we may need to shuffle the frequency counts. */
+static void renormalize(struct tally *tally,
+ ssize_t new_min, ssize_t new_max)
+{
+ size_t range, spill;
+ unsigned int i, old_min;
+
+ /* Uninitialized? Don't do anything... */
+ if (tally->max < tally->min)
+ goto update;
+
+ /* If we don't have sufficient range, increase step bits until
+ * buckets cover entire range of ssize_t anyway. */
+ range = (new_max - new_min) + 1;
+ while (!shift_overflows(tally->buckets, tally->step_bits)
+ && range > ((size_t)tally->buckets << tally->step_bits)) {
+ /* Collapse down. */
+ for (i = 1; i < tally->buckets; i++) {
+ tally->counts[i/2] += tally->counts[i];
+ tally->counts[i] = 0;
+ }
+ tally->step_bits++;
+ }
+
+ /* Now if minimum has dropped, move buckets up. */
+ old_min = bucket_of(new_min, tally->step_bits, tally->min);
+ memmove(tally->counts + old_min,
+ tally->counts,
+ sizeof(tally->counts[0]) * (tally->buckets - old_min));
+ memset(tally->counts, 0, sizeof(tally->counts[0]) * old_min);
+
+ /* If we moved boundaries, adjust buckets to that ratio. */
+ spill = (tally->min - new_min) % (1 << tally->step_bits);
+ for (i = 0; i < tally->buckets-1; i++) {
+ size_t adjust = (tally->counts[i] >> tally->step_bits) * spill;
+ tally->counts[i] -= adjust;
+ tally->counts[i+1] += adjust;
+ }
+
+update:
+ tally->min = new_min;
+ tally->max = new_max;
+}
+
+void tally_add(struct tally *tally, ssize_t val)
+{
+ ssize_t new_min = tally->min, new_max = tally->max;
+ bool need_renormalize = false;
+
+ if (val < tally->min) {
+ new_min = val;
+ need_renormalize = true;
+ }
+ if (val > tally->max) {
+ new_max = val;
+ need_renormalize = true;
+ }
+ if (need_renormalize)
+ renormalize(tally, new_min, new_max);
+
+ /* 128-bit arithmetic! If we didn't want exact mean, we could just
+ * pull it out of counts. */
+ if (val > 0 && tally->total[0] + val < tally->total[0])
+ tally->total[1]++;
+ else if (val < 0 && tally->total[0] + val > tally->total[0])
+ tally->total[1]--;
+ tally->total[0] += val;
+ tally->counts[bucket_of(tally->min, tally->step_bits, val)]++;
+}
+
+size_t tally_num(const struct tally *tally)
+{
+ size_t i, num = 0;
+ for (i = 0; i < tally->buckets; i++)
+ num += tally->counts[i];
+ return num;
+}
+
+ssize_t tally_min(const struct tally *tally)
+{
+ return tally->min;
+}
+
+ssize_t tally_max(const struct tally *tally)
+{
+ return tally->max;
+}
+
+/* FIXME: Own ccan module please! */
+static unsigned fls64(uint64_t val)
+{
+#if HAVE_BUILTIN_CLZL
+ if (val <= ULONG_MAX) {
+ /* This is significantly faster! */
+ return val ? sizeof(long) * CHAR_BIT - __builtin_clzl(val) : 0;
+ } else {
+#endif
+ uint64_t r = 64;
+
+ if (!val)
+ return 0;
+ if (!(val & 0xffffffff00000000ull)) {
+ val <<= 32;
+ r -= 32;
+ }
+ if (!(val & 0xffff000000000000ull)) {
+ val <<= 16;
+ r -= 16;
+ }
+ if (!(val & 0xff00000000000000ull)) {
+ val <<= 8;
+ r -= 8;
+ }
+ if (!(val & 0xf000000000000000ull)) {
+ val <<= 4;
+ r -= 4;
+ }
+ if (!(val & 0xc000000000000000ull)) {
+ val <<= 2;
+ r -= 2;
+ }
+ if (!(val & 0x8000000000000000ull)) {
+ val <<= 1;
+ r -= 1;
+ }
+ return r;
+#if HAVE_BUILTIN_CLZL
+ }
+#endif
+}
+
+/* This is stolen straight from Hacker's Delight. */
+static uint64_t divlu64(uint64_t u1, uint64_t u0, uint64_t v)
+{
+ const uint64_t b = 4294967296ULL; // Number base (32 bits).
+ uint32_t un[4], // Dividend and divisor
+ vn[2]; // normalized and broken
+ // up into halfwords.
+ uint32_t q[2]; // Quotient as halfwords.
+ uint64_t un1, un0, // Dividend and divisor
+ vn0; // as fullwords.
+ uint64_t qhat; // Estimated quotient digit.
+ uint64_t rhat; // A remainder.
+ uint64_t p; // Product of two digits.
+ int64_t s, i, j, t, k;
+
+ if (u1 >= v) // If overflow, return the largest
+ return (uint64_t)-1; // possible quotient.
+
+ s = 64 - fls64(v); // 0 <= s <= 63.
+ vn0 = v << s; // Normalize divisor.
+ vn[1] = vn0 >> 32; // Break divisor up into
+ vn[0] = vn0 & 0xFFFFFFFF; // two 32-bit halves.
+
+ // Shift dividend left.
+ un1 = ((u1 << s) | (u0 >> (64 - s))) & (-s >> 63);
+ un0 = u0 << s;
+ un[3] = un1 >> 32; // Break dividend up into
+ un[2] = un1; // four 32-bit halfwords
+ un[1] = un0 >> 32; // Note: storing into
+ un[0] = un0; // halfwords truncates.
+
+ for (j = 1; j >= 0; j--) {
+ // Compute estimate qhat of q[j].
+ qhat = (un[j+2]*b + un[j+1])/vn[1];
+ rhat = (un[j+2]*b + un[j+1]) - qhat*vn[1];
+ again:
+ if (qhat >= b || qhat*vn[0] > b*rhat + un[j]) {
+ qhat = qhat - 1;
+ rhat = rhat + vn[1];
+ if (rhat < b) goto again;
+ }
+
+ // Multiply and subtract.
+ k = 0;
+ for (i = 0; i < 2; i++) {
+ p = qhat*vn[i];
+ t = un[i+j] - k - (p & 0xFFFFFFFF);
+ un[i+j] = t;
+ k = (p >> 32) - (t >> 32);
+ }
+ t = un[j+2] - k;
+ un[j+2] = t;
+
+ q[j] = qhat; // Store quotient digit.
+ if (t < 0) { // If we subtracted too
+ q[j] = q[j] - 1; // much, add back.
+ k = 0;
+ for (i = 0; i < 2; i++) {
+ t = un[i+j] + vn[i] + k;
+ un[i+j] = t;
+ k = t >> 32;
+ }
+ un[j+2] = un[j+2] + k;
+ }
+ } // End j.
+
+ return q[1]*b + q[0];
+}
+
+static int64_t divls64(int64_t u1, uint64_t u0, int64_t v)
+{
+ int64_t q, uneg, vneg, diff, borrow;
+
+ uneg = u1 >> 63; // -1 if u < 0.
+ if (uneg) { // Compute the absolute
+ u0 = -u0; // value of the dividend u.
+ borrow = (u0 != 0);
+ u1 = -u1 - borrow;
+ }
+
+ vneg = v >> 63; // -1 if v < 0.
+ v = (v ^ vneg) - vneg; // Absolute value of v.
+
+ if ((uint64_t)u1 >= (uint64_t)v)
+ goto overflow;
+
+ q = divlu64(u1, u0, v);
+
+ diff = uneg ^ vneg; // Negate q if signs of
+ q = (q ^ diff) - diff; // u and v differed.
+
+ if ((diff ^ q) < 0 && q != 0) { // If overflow, return the largest
+ overflow: // possible neg. quotient.
+ q = 0x8000000000000000ULL;
+ }
+ return q;
+}
+
+ssize_t tally_mean(const struct tally *tally)
+{
+ size_t count = tally_num(tally);
+ if (!count)
+ return 0;
+
+ if (sizeof(tally->total[0]) == sizeof(uint32_t)) {
+ /* Use standard 64-bit arithmetic. */
+ int64_t total = tally->total[0]
+ | (((uint64_t)tally->total[1]) << 32);
+ return total / count;
+ }
+ return divls64(tally->total[1], tally->total[0], count);
+}
+
+ssize_t tally_total(const struct tally *tally, ssize_t *overflow)
+{
+ if (overflow) {
+ *overflow = tally->total[1];
+ return tally->total[0];
+ }
+
+ /* If result is negative, make sure we can represent it. */
+ if (tally->total[1] & ((size_t)1 << (SIZET_BITS-1))) {
+ /* Must have only underflowed once, and must be able to
+ * represent result at ssize_t. */
+ if ((~tally->total[1])+1 != 0
+ || (ssize_t)tally->total[0] >= 0) {
+ /* Underflow, return minimum. */
+ return (ssize_t)((size_t)1 << (SIZET_BITS - 1));
+ }
+ } else {
+ /* Result is positive, must not have overflowed, and must be
+ * able to represent as ssize_t. */
+ if (tally->total[1] || (ssize_t)tally->total[0] < 0) {
+ /* Overflow. Return maximum. */
+ return (ssize_t)~((size_t)1 << (SIZET_BITS - 1));
+ }
+ }
+ return tally->total[0];
+}
+
+static ssize_t bucket_range(const struct tally *tally, unsigned b, size_t *err)
+{
+ ssize_t min, max;
+
+ min = bucket_min(tally->min, tally->step_bits, b);
+ if (b == tally->buckets - 1)
+ max = tally->max;
+ else
+ max = bucket_min(tally->min, tally->step_bits, b+1) - 1;
+
+ /* FIXME: Think harder about cumulative error; is this enough?. */
+ *err = (max - min + 1) / 2;
+ /* Avoid overflow. */
+ return min + (max - min) / 2;
+}
+
+ssize_t tally_approx_median(const struct tally *tally, size_t *err)
+{
+ size_t count = tally_num(tally), total = 0;
+ unsigned int i;
+
+ for (i = 0; i < tally->buckets; i++) {
+ total += tally->counts[i];
+ if (total * 2 >= count)
+ break;
+ }
+ return bucket_range(tally, i, err);
+}
+
+ssize_t tally_approx_mode(const struct tally *tally, size_t *err)
+{
+ unsigned int i, min_best = 0, max_best = 0;
+
+ for (i = 0; i < tally->buckets; i++) {
+ if (tally->counts[i] > tally->counts[min_best]) {
+ min_best = max_best = i;
+ } else if (tally->counts[i] == tally->counts[min_best]) {
+ max_best = i;
+ }
+ }
+
+ /* We can have more than one best, making our error huge. */
+ if (min_best != max_best) {
+ ssize_t min, max;
+ min = bucket_range(tally, min_best, err);
+ max = bucket_range(tally, max_best, err);
+ max += *err;
+ *err += (size_t)(max - min);
+ return min + (max - min) / 2;
+ }
+
+ return bucket_range(tally, min_best, err);
+}
+
+static unsigned get_max_bucket(const struct tally *tally)
+{
+ unsigned int i;
+
+ for (i = tally->buckets; i > 0; i--)
+ if (tally->counts[i-1])
+ break;
+ return i;
+}
+
+char *tally_histogram(const struct tally *tally,
+ unsigned width, unsigned height)
+{
+ unsigned int i, count, max_bucket, largest_bucket;
+ struct tally *tmp;
+ char *graph, *p;
+
+ assert(width >= TALLY_MIN_HISTO_WIDTH);
+ assert(height >= TALLY_MIN_HISTO_HEIGHT);
+
+ /* Ignore unused buckets. */
+ max_bucket = get_max_bucket(tally);
+
+ /* FIXME: It'd be nice to smooth here... */
+ if (height >= max_bucket) {
+ height = max_bucket;
+ tmp = NULL;
+ } else {
+ /* We create a temporary then renormalize so < height. */
+ /* FIXME: Antialias properly! */
+ tmp = tally_new(tally->buckets);
+ if (!tmp)
+ return NULL;
+ tmp->min = tally->min;
+ tmp->max = tally->max;
+ tmp->step_bits = tally->step_bits;
+ memcpy(tmp->counts, tally->counts,
+ sizeof(tally->counts[0]) * tmp->buckets);
+ while ((max_bucket = get_max_bucket(tmp)) >= height)
+ renormalize(tmp, tmp->min, tmp->max * 2);
+ /* Restore max */
+ tmp->max = tally->max;
+ tally = tmp;
+ height = max_bucket;
+ }
+
+ /* Figure out longest line, for scale. */
+ largest_bucket = 0;
+ for (i = 0; i < tally->buckets; i++) {
+ if (tally->counts[i] > largest_bucket)
+ largest_bucket = tally->counts[i];
+ }
+
+ p = graph = malloc(height * (width + 1) + 1);
+ if (!graph) {
+ free(tmp);
+ return NULL;
+ }
+
+ for (i = 0; i < height; i++) {
+ unsigned covered = 1, row;
+
+ /* People expect minimum at the bottom. */
+ row = height - i - 1;
+ count = (double)tally->counts[row] / largest_bucket * (width-1)+1;
+
+ if (row == 0)
+ covered = snprintf(p, width, "%zi", tally->min);
+ else if (row == height - 1)
+ covered = snprintf(p, width, "%zi", tally->max);
+ else if (row == bucket_of(tally->min, tally->step_bits, 0))
+ *p = '+';
+ else
+ *p = '|';
+
+ if (covered > width)
+ covered = width;
+ p += covered;
+
+ if (count > covered)
+ count -= covered;
+ else
+ count = 0;
+
+ memset(p, '*', count);
+ p += count;
+ *p = '\n';
+ p++;
+ }
+ *p = '\0';
+ free(tmp);
+ return graph;
+}
diff --git a/lib/ccan/tally/tally.h b/lib/ccan/tally/tally.h
new file mode 100644
index 0000000000..e715717507
--- /dev/null
+++ b/lib/ccan/tally/tally.h
@@ -0,0 +1,103 @@
+#ifndef CCAN_TALLY_H
+#define CCAN_TALLY_H
+#include <stdlib.h>
+
+struct tally;
+
+/**
+ * tally_new - allocate the tally structure.
+ * @buckets: the number of frequency buckets.
+ *
+ * This allocates a tally structure using malloc(). The greater the value
+ * of @buckets, the more accurate tally_approx_median() and tally_approx_mode()
+ * and tally_histogram() will be, but more memory is consumed. If you want
+ * to use tally_histogram(), the optimal bucket value is the same as that
+ * @height argument.
+ */
+struct tally *tally_new(unsigned int buckets);
+
+/**
+ * tally_add - add a value.
+ * @tally: the tally structure.
+ * @val: the value to add.
+ */
+void tally_add(struct tally *tally, ssize_t val);
+
+/**
+ * tally_num - how many times as tally_add been called?
+ * @tally: the tally structure.
+ */
+size_t tally_num(const struct tally *tally);
+
+/**
+ * tally_min - the minimum value passed to tally_add.
+ * @tally: the tally structure.
+ *
+ * Undefined if tally_num() == 0.
+ */
+ssize_t tally_min(const struct tally *tally);
+
+/**
+ * tally_max - the maximum value passed to tally_add.
+ * @tally: the tally structure.
+ *
+ * Undefined if tally_num() == 0.
+ */
+ssize_t tally_max(const struct tally *tally);
+
+/**
+ * tally_mean - the mean value passed to tally_add.
+ * @tally: the tally structure.
+ *
+ * Undefined if tally_num() == 0, but will not crash.
+ */
+ssize_t tally_mean(const struct tally *tally);
+
+/**
+ * tally_total - the total value passed to tally_add.
+ * @tally: the tally structure.
+ * @overflow: the overflow value (or NULL).
+ *
+ * If your total can't overflow a ssize_t, you don't need @overflow.
+ * Otherwise, @overflow is the upper ssize_t, and the return value should
+ * be treated as the lower size_t (ie. the sign bit is in @overflow).
+ */
+ssize_t tally_total(const struct tally *tally, ssize_t *overflow);
+
+/**
+ * tally_approx_median - the approximate median value passed to tally_add.
+ * @tally: the tally structure.
+ * @err: the error in the returned value (ie. real median is +/- @err).
+ *
+ * Undefined if tally_num() == 0, but will not crash. Because we
+ * don't reallocate, we don't store all values, so this median cannot be
+ * exact.
+ */
+ssize_t tally_approx_median(const struct tally *tally, size_t *err);
+
+/**
+ * tally_approx_mode - the approximate mode value passed to tally_add.
+ * @tally: the tally structure.
+ * @err: the error in the returned value (ie. real mode is +/- @err).
+ *
+ * Undefined if tally_num() == 0, but will not crash. Because we
+ * don't reallocate, we don't store all values, so this mode cannot be
+ * exact. It could well be a value which was never passed to tally_add!
+ */
+ssize_t tally_approx_mode(const struct tally *tally, size_t *err);
+
+#define TALLY_MIN_HISTO_WIDTH 8
+#define TALLY_MIN_HISTO_HEIGHT 3
+
+/**
+ * tally_graph - return an ASCII image of the tally_add distribution
+ * @tally: the tally structure.
+ * @width: the maximum string width to use (>= TALLY_MIN_HISTO_WIDTH)
+ * @height: the maximum string height to use (>= TALLY_MIN_HISTO_HEIGHT)
+ *
+ * Returns a malloc()ed string which draws a multi-line graph of the
+ * distribution of values. On out of memory returns NULL.
+ */
+char *tally_histogram(const struct tally *tally,
+ unsigned width, unsigned height);
+#endif /* CCAN_TALLY_H */
diff --git a/lib/ccan/tally/test/run-bucket_of.c b/lib/ccan/tally/test/run-bucket_of.c
new file mode 100644
index 0000000000..5e12725757
--- /dev/null
+++ b/lib/ccan/tally/test/run-bucket_of.c
@@ -0,0 +1,71 @@
+#include <ccan/tally/tally.c>
+#include <ccan/tap/tap.h>
+
+int main(void)
+{
+ unsigned int i, max_step;
+ ssize_t min, max;
+
+ max = (ssize_t)~(1ULL << (sizeof(max)*CHAR_BIT - 1));
+ min = (ssize_t)(1ULL << (sizeof(max)*CHAR_BIT - 1));
+ max_step = sizeof(max)*CHAR_BIT;
+
+ plan_tests(2 + 100 + 10 + 5
+ + 2 + 100 + 5 + 4
+ + (1 << 7) * (max_step - 7));
+
+ /* Single step, single bucket == easy. */
+ ok1(bucket_of(0, 0, 0) == 0);
+
+ /* Double step, still in first bucket. */
+ ok1(bucket_of(0, 1, 0) == 0);
+
+ /* Step 8. */
+ for (i = 0; i < 100; i++)
+ ok1(bucket_of(0, 3, i) == i >> 3);
+
+ /* 10 values in 5 buckets, step 2. */
+ for (i = 0; i < 10; i++)
+ ok1(bucket_of(0, 1, i) == i >> 1);
+
+ /* Extreme cases. */
+ ok1(bucket_of(min, 0, min) == 0);
+ ok1(bucket_of(min, max_step-1, min) == 0);
+ ok1(bucket_of(min, max_step-1, max) == 1);
+ ok1(bucket_of(min, max_step, min) == 0);
+ ok1(bucket_of(min, max_step, max) == 0);
+
+ /* Now, bucket_min() should match: */
+ ok1(bucket_min(0, 0, 0) == 0);
+
+ /* Double step, val in first bucket still 0. */
+ ok1(bucket_min(0, 1, 0) == 0);
+
+ /* Step 8. */
+ for (i = 0; i < 100; i++)
+ ok1(bucket_min(0, 3, i) == i << 3);
+
+ /* 10 values in 5 buckets, step 2. */
+ for (i = 0; i < 5; i++)
+ ok1(bucket_min(0, 1, i) == i << 1);
+
+ /* Extreme cases. */
+ ok1(bucket_min(min, 0, 0) == min);
+ ok1(bucket_min(min, max_step-1, 0) == min);
+ ok1(bucket_min(min, max_step-1, 1) == 0);
+ ok1(bucket_min(min, max_step, 0) == min);
+
+ /* Now, vary step and number of buckets, but bucket_min and bucket_of
+ * must agree. */
+ for (i = 0; i < (1 << 7); i++) {
+ unsigned int j;
+ for (j = 0; j < max_step - 7; j++) {
+ ssize_t val;
+
+ val = bucket_min(-(ssize_t)i, j, i);
+ ok1(bucket_of(-(ssize_t)i, j, val) == i);
+ }
+ }
+
+ return exit_status();
+}
diff --git a/lib/ccan/tally/test/run-divlu64.c b/lib/ccan/tally/test/run-divlu64.c
new file mode 100644
index 0000000000..057e47432c
--- /dev/null
+++ b/lib/ccan/tally/test/run-divlu64.c
@@ -0,0 +1,31 @@
+#include <ccan/tally/tally.c>
+#include <ccan/tap/tap.h>
+
+int main(void)
+{
+ unsigned int i, j;
+
+ plan_tests(5985);
+ /* Simple tests. */
+ for (i = 0; i < 127; i++) {
+ uint64_t u1, u0;
+ if (i < 64) {
+ u1 = 0;
+ u0 = 1ULL << i;
+ j = 0;
+ } else {
+ u1 = 1ULL << (i - 64);
+ u0 = 0;
+ j = i - 63;
+ }
+ for (; j < 63; j++) {
+ uint64_t answer;
+ if (j > i)
+ answer = 0;
+ else
+ answer = 1ULL << (i - j);
+ ok1(divlu64(u1, u0, 1ULL << j) == answer);
+ }
+ }
+ return exit_status();
+}
diff --git a/lib/ccan/tally/test/run-histogram.c b/lib/ccan/tally/test/run-histogram.c
new file mode 100644
index 0000000000..a9894ecd85
--- /dev/null
+++ b/lib/ccan/tally/test/run-histogram.c
@@ -0,0 +1,108 @@
+#include <ccan/tally/tally.c>
+#include <ccan/tap/tap.h>
+
+int main(void)
+{
+ int i;
+ struct tally *tally;
+ char *graph, *p;
+
+ plan_tests(100 + 1 + 10 + 1 + 100 + 1 + 10 + 1 + 10 * 2 + 1);
+
+ /* Uniform distribution, easy. */
+ tally = tally_new(100);
+ for (i = 0; i < 100; i++)
+ tally_add(tally, i);
+
+ /* 1:1 height. */
+ graph = p = tally_histogram(tally, 20, 100);
+ for (i = 0; i < 100; i++) {
+ char *eol = strchr(p, '\n');
+
+ /* We expect it filled all way to the end. */
+ ok1(eol - p == 20);
+ p = eol + 1;
+ }
+ ok1(!*p);
+ free(graph);
+
+ /* Reduced height. */
+ graph = p = tally_histogram(tally, 20, 10);
+ for (i = 0; i < 10; i++) {
+ char *eol = strchr(p, '\n');
+
+ /* First once can be truncated (bucket aliasing) */
+ if (eol) {
+ ok1(eol - p == 20 || (eol - p < 20 && i == 0));
+ } else
+ /* We should, at worst, half-fill graph */
+ ok1(i > 5);
+
+ if (eol)
+ p = eol + 1;
+ }
+ ok1(!*p);
+ free(graph);
+
+ /* Enlarged height (gets capped). */
+ graph = p = tally_histogram(tally, 20, 1000);
+ for (i = 0; i < 100; i++) {
+ char *eol = strchr(p, '\n');
+ /* We expect it filled all way to the end. */
+ ok1(eol - p == 20);
+ p = eol + 1;
+ }
+ ok1(!*p);
+ free(graph);
+ free(tally);
+
+ /* Distinctive increasing pattern. */
+ tally = tally_new(10);
+ for (i = 0; i < 10; i++) {
+ unsigned int j;
+ for (j = 0; j <= i; j++)
+ tally_add(tally, i);
+ }
+
+ graph = p = tally_histogram(tally, 10, 10);
+ for (i = 0; i < 10; i++) {
+ char *eol = strchr(p, '\n');
+ ok1(eol - p == 10 - i);
+ p = eol + 1;
+ }
+ ok1(!*p);
+ diag("Here's the pretty: %s", graph);
+ free(graph);
+ free(tally);
+
+ /* With negative values. */
+ tally = tally_new(10);
+ for (i = 0; i < 10; i++) {
+ tally_add(tally, i - 5);
+ }
+
+ graph = p = tally_histogram(tally, 10, 10);
+ for (i = 0; i < 10; i++) {
+ char *eol = strchr(p, '\n');
+
+ /* We expect it filled all way to the end. */
+ ok1(eol - p == 10);
+
+ /* Check min/max labels. */
+ if (i == 0)
+ ok1(strncmp(p, "4*", 2) == 0);
+ else if (i == 9)
+ ok1(strncmp(p, "-5*", 3) == 0);
+ else if (i == 4)
+ ok1(p[0] == '+'); /* 0 marker */
+ else
+ ok1(p[0] == '|');
+ p = eol + 1;
+ }
+ ok1(!*p);
+ diag("Here's the pretty: %s", graph);
+ free(graph);
+ free(tally);
+
+ return exit_status();
+}
diff --git a/lib/ccan/tally/test/run-mean.c b/lib/ccan/tally/test/run-mean.c
new file mode 100644
index 0000000000..a4a67cf9c8
--- /dev/null
+++ b/lib/ccan/tally/test/run-mean.c
@@ -0,0 +1,29 @@
+#include <ccan/tally/tally.c>
+#include <ccan/tap/tap.h>
+
+int main(void)
+{
+ int i;
+ struct tally *tally = tally_new(0);
+ ssize_t min, max;
+
+ max = (ssize_t)~(1ULL << (sizeof(max)*CHAR_BIT - 1));
+ min = (ssize_t)(1ULL << (sizeof(max)*CHAR_BIT - 1));
+
+ plan_tests(100 + 100);
+ /* Simple mean test: should always be 0. */
+ for (i = 0; i < 100; i++) {
+ tally_add(tally, i);
+ tally_add(tally, -i);
+ ok1(tally_mean(tally) == 0);
+ }
+
+ /* Works for big values too... */
+ for (i = 0; i < 100; i++) {
+ tally_add(tally, max - i);
+ tally_add(tally, min + 1 + i);
+ ok1(tally_mean(tally) == 0);
+ }
+
+ return exit_status();
+}
diff --git a/lib/ccan/tally/test/run-median.c b/lib/ccan/tally/test/run-median.c
new file mode 100644
index 0000000000..f461c2ec8f
--- /dev/null
+++ b/lib/ccan/tally/test/run-median.c
@@ -0,0 +1,45 @@
+#include <ccan/tally/tally.c>
+#include <ccan/tap/tap.h>
+
+int main(void)
+{
+ int i;
+ struct tally *tally = tally_new(100);
+ ssize_t min, max, median;
+ size_t err;
+
+ max = (ssize_t)~(1ULL << (sizeof(max)*CHAR_BIT - 1));
+ min = (ssize_t)(1ULL << (sizeof(max)*CHAR_BIT - 1));
+
+ plan_tests(100*2 + 100*2 + 100*2);
+ /* Simple median test: should always be around 0. */
+ for (i = 0; i < 100; i++) {
+ tally_add(tally, i);
+ tally_add(tally, -i);
+ median = tally_approx_median(tally, &err);
+ ok1(err <= 4);
+ ok1(median - (ssize_t)err <= 0 && median + (ssize_t)err >= 0);
+ }
+
+ /* Works for big values too... */
+ for (i = 0; i < 100; i++) {
+ tally_add(tally, max - i);
+ tally_add(tally, min + 1 + i);
+ median = tally_approx_median(tally, &err);
+ /* Error should be < 100th of max - min. */
+ ok1(err <= max / 100 * 2);
+ ok1(median - (ssize_t)err <= 0 && median + (ssize_t)err >= 0);
+ }
+ free(tally);
+
+ tally = tally_new(10);
+ for (i = 0; i < 100; i++) {
+ tally_add(tally, i);
+ median = tally_approx_median(tally, &err);
+ ok1(err <= i / 10 + 1);
+ ok1(median - (ssize_t)err <= i/2
+ && median + (ssize_t)err >= i/2);
+ }
+
+ return exit_status();
+}
diff --git a/lib/ccan/tally/test/run-min-max.c b/lib/ccan/tally/test/run-min-max.c
new file mode 100644
index 0000000000..ffb0f2eff4
--- /dev/null
+++ b/lib/ccan/tally/test/run-min-max.c
@@ -0,0 +1,20 @@
+#include <ccan/tally/tally.c>
+#include <ccan/tap/tap.h>
+
+int main(void)
+{
+ int i;
+ struct tally *tally = tally_new(0);
+
+ plan_tests(100 * 4);
+ /* Test max, min and num. */
+ for (i = 0; i < 100; i++) {
+ tally_add(tally, i);
+ ok1(tally_num(tally) == i*2 + 1);
+ tally_add(tally, -i);
+ ok1(tally_num(tally) == i*2 + 2);
+ ok1(tally_max(tally) == i);
+ ok1(tally_min(tally) == -i);
+ }
+ return exit_status();
+}
diff --git a/lib/ccan/tally/test/run-mode.c b/lib/ccan/tally/test/run-mode.c
new file mode 100644
index 0000000000..7216738bfb
--- /dev/null
+++ b/lib/ccan/tally/test/run-mode.c
@@ -0,0 +1,45 @@
+#include <ccan/tally/tally.c>
+#include <ccan/tap/tap.h>
+
+int main(void)
+{
+ int i;
+ struct tally *tally = tally_new(100);
+ ssize_t min, max, mode;
+ size_t err;
+
+ max = (ssize_t)~(1ULL << (sizeof(max)*CHAR_BIT - 1));
+ min = (ssize_t)(1ULL << (sizeof(max)*CHAR_BIT - 1));
+
+ plan_tests(100 + 50 + 100 + 100 + 10);
+ /* Simple mode test: should always be around 0 (we add that twice). */
+ for (i = 0; i < 100; i++) {
+ tally_add(tally, i);
+ tally_add(tally, -i);
+ mode = tally_approx_mode(tally, &err);
+ if (i < 50)
+ ok1(err == 0);
+ ok1(mode - (ssize_t)err <= 0 && mode + (ssize_t)err >= 0);
+ }
+
+ /* Works for big values too... */
+ for (i = 0; i < 100; i++) {
+ tally_add(tally, max - i);
+ tally_add(tally, min + 1 + i);
+ mode = tally_approx_mode(tally, &err);
+ ok1(mode - (ssize_t)err <= 0 && mode + (ssize_t)err >= 0);
+ }
+ free(tally);
+
+ tally = tally_new(10);
+ tally_add(tally, 0);
+ for (i = 0; i < 100; i++) {
+ tally_add(tally, i);
+ mode = tally_approx_mode(tally, &err);
+ if (i < 10)
+ ok1(err == 0);
+ ok1(mode - (ssize_t)err <= 0 && mode + (ssize_t)err >= 0);
+ }
+
+ return exit_status();
+}
diff --git a/lib/ccan/tally/test/run-renormalize.c b/lib/ccan/tally/test/run-renormalize.c
new file mode 100644
index 0000000000..7c141528d2
--- /dev/null
+++ b/lib/ccan/tally/test/run-renormalize.c
@@ -0,0 +1,25 @@
+#include <ccan/tally/tally.c>
+#include <ccan/tap/tap.h>
+
+int main(void)
+{
+ struct tally *tally = tally_new(2);
+
+ plan_tests(4);
+ tally->min = 0;
+ tally->max = 0;
+ tally->counts[0] = 1;
+
+ /* This renormalize should do nothing. */
+ renormalize(tally, 0, 1);
+ ok1(tally->counts[0] == 1);
+ ok1(tally->counts[1] == 0);
+ tally->counts[1]++;
+
+ /* This renormalize should collapse both into bucket 0. */
+ renormalize(tally, 0, 3);
+ ok1(tally->counts[0] == 2);
+ ok1(tally->counts[1] == 0);
+
+ return exit_status();
+}
diff --git a/lib/ccan/tally/test/run-total.c b/lib/ccan/tally/test/run-total.c
new file mode 100644
index 0000000000..d7d73e58a5
--- /dev/null
+++ b/lib/ccan/tally/test/run-total.c
@@ -0,0 +1,56 @@
+#include <ccan/tally/tally.c>
+#include <ccan/tap/tap.h>
+
+int main(void)
+{
+ struct tally *tally;
+ ssize_t total, overflow;
+ ssize_t min, max;
+
+ max = (ssize_t)~(1ULL << (sizeof(max)*CHAR_BIT - 1));
+ min = (ssize_t)(1ULL << (sizeof(max)*CHAR_BIT - 1));
+
+ plan_tests(15);
+
+ /* Simple case. */
+ tally = tally_new(0);
+ tally_add(tally, min);
+ ok1(tally_total(tally, NULL) == min);
+ ok1(tally_total(tally, &overflow) == min);
+ ok1(overflow == -1);
+
+ /* Underflow. */
+ tally_add(tally, min);
+ total = tally_total(tally, &overflow);
+ ok1(overflow == -1);
+ ok1((size_t)total == 0);
+ ok1(tally_total(tally, NULL) == min);
+ free(tally);
+
+ /* Simple case. */
+ tally = tally_new(0);
+ tally_add(tally, max);
+ ok1(tally_total(tally, NULL) == max);
+ ok1(tally_total(tally, &overflow) == max);
+ ok1(overflow == 0);
+
+ /* Overflow into sign bit... */
+ tally_add(tally, max);
+ total = tally_total(tally, &overflow);
+ ok1(overflow == 0);
+ ok1((size_t)total == (size_t)-2);
+ ok1(tally_total(tally, NULL) == max);
+
+ /* Overflow into upper size_t. */
+ tally_add(tally, max);
+ total = tally_total(tally, &overflow);
+ ok1(overflow == 1);
+ if (sizeof(size_t) == 4)
+ ok1((size_t)total == 0x7FFFFFFD);
+ else if (sizeof(size_t) == 8)
+ ok1((size_t)total == 0x7FFFFFFFFFFFFFFDULL);
+ ok1(tally_total(tally, NULL) == max);
+ free(tally);
+
+ return exit_status();
+}